Files
salmanoff/smocore/serializedAsynchronousContinuation.cpp
T

287 lines
11 KiB
C++
Raw Normal View History

2025-09-22 21:29:43 -04:00
#include <config.h>
#include <serializedAsynchronousContinuation.h>
#include <qutex.h>
namespace smo {
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
template <class OriginalCbFnT>
template <class InvocationTargetT>
bool
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::traceContinuationHistoryForDeadlockOn(Qutex& firstFailedQutex)
{
/** EXPLANATION:
* In this function we will trace through the chain of continuations that
* led up to this Lockvoker's continuation. For each continuation which is
* a SerializedAsynchronousContinuation, we check through its LockSet to see
* if it contains the lock that failed acquisition. If it does, we have a
* deadlock.
*/
/* We can't start with the continuation directly referenced by this starting
* Lockvoker as it would contain the all locks we're currently trying to
* acquire...and rightly so because it's the continuation for this current
* lockvoker.
*/
for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
this->serializedContinuation.getCallersContinuationShPtr();
currContin != nullptr;
currContin = currContin->getCallersContinuationShPtr())
{
auto serializedCont = std::dynamic_pointer_cast<
SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
if (serializedCont == nullptr) { continue; }
// Check if the firstFailedQutex is in this continuation's LockSet
try {
const auto& lockUsageDesc = serializedCont->requiredLocks
.getLockUsageDesc(firstFailedQutex);
} catch (const std::runtime_error& e) {
std::cerr << __func__ << ": " << e.what() << std::endl;
continue;
}
std::cout << __func__ << ":Deadlock detected: Found "
<< "firstFailedQutex @" << &firstFailedQutex
<< " (" << firstFailedQutex.name << ") in LockSet of "
<< "SerializedAsynchronousContinuation @"
<< serializedCont.get() << std::endl;
return true;
}
return false;
}
2025-09-29 12:58:41 -04:00
template <class OriginalCbFnT>
template <class InvocationTargetT>
bool
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::obsolete::traceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex)
2025-09-29 12:58:41 -04:00
{
2025-09-29 13:38:53 -04:00
/** EXPLANATION:
* In this function we check for gridlocks which are slightly different
* from deadlocks. In a gridlock, two requests are waiting for locks that
* are held by the other. I.e:
*
* R1 holds LockA and is waiting for LockB.
* R2 holds LockB and is waiting for LockA.
*
* This differs from deadlocks because it's not a single request which is
* attempting to re-acquire a lock that it already holds.
*
* To detect this condition, we wait until the acquisition timeout has
* expired. Then: we extract the current owner of the first lock we're
* failing to acquire.
*
* From there, we go through each of the locks in the foreign owner's
* current (i.e: immediate, most recent continuation's) required LockSet.
* For each of the locks in the foreign owner's most immediate required
* LockSet, we trace backward in our *OWN* history to see if any of *OUR*
* continuations (excluding our most immediate continuation) contains that
* lock.
*
* If we find a match, that means that we're holding a lock that the foreign
* owner is waiting for. And we already know that the foreign owner is
* holding a lock that we're waiting for (when we extracted the current
* owner of the first failed lock in our most immediate Lockset).
*
* Hence, we have a gridlock.
*/
std::shared_ptr<LockerAndInvokerBase> foreignOwnerShPtr =
firstFailedQutex.getCurrOwner();
2025-09-29 13:38:53 -04:00
// If no current owner, can't be a gridlock
if (foreignOwnerShPtr == nullptr)
2025-09-29 13:38:53 -04:00
{ return false; }
// Use reference for the rest of the function for safety.
LockerAndInvokerBase &foreignOwner = *foreignOwnerShPtr;
2025-09-29 13:38:53 -04:00
/* For each lock in the foreign owner's LockSet, check if we hold it
* in any of our previous continuations (excluding our most immediate one)
*/
for (size_t i = 0; i < foreignOwner.getLockSetSize(); ++i)
{
Qutex& foreignLock = foreignOwner.getLockAt(i);
/* Skip the firstFailedQutex since we already know the foreign owner
* holds it -- hence it's impossible for any of our previous
* continuations to hold it.
*/
if (&foreignLock == &firstFailedQutex)
{ continue; }
/** EXPLANATION:
* Trace backward through our continuation history (excluding our most
* immediate continuation).
*
* The reason we exclude our most immediate continuation is because the
* LockSet acquisition algorithm backs off if it fails to acquire ALL
* locks in the set. So if the lock that the foreign owner is waiting
* for is in our most immediate continuation, and NOT in one of our
* previous continuations, then we will back off and the foreign owner
* should eventually be able to acquire that lock.
*/
for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
this->serializedContinuation.getCallersContinuationShPtr();
2025-09-29 13:38:53 -04:00
currContin != nullptr;
currContin = currContin->getCallersContinuationShPtr())
2025-09-29 13:38:53 -04:00
{
auto serializedCont = std::dynamic_pointer_cast<
SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
if (serializedCont == nullptr) { continue; }
// Check if this continuation holds the foreign lock
try {
const auto& lockUsageDesc = serializedCont->requiredLocks
.getLockUsageDesc(foreignLock);
// Matched! We hold a lock that the foreign owner is waiting for
std::cout << __func__ << ": Gridlock detected: We hold lock @"
<< &foreignLock << " (" << foreignLock.name << ") in "
"continuation @" << serializedCont.get()
<< ", while foreign owner @" << &foreignOwner
<< " holds lock @" << &firstFailedQutex << " ("
<< firstFailedQutex.name << ") that we're waiting for"
<< std::endl;
return true;
} catch (const std::runtime_error& e) {
// This continuation doesn't hold the foreign lock. Continue.
continue;
}
}
}
2025-09-29 12:58:41 -04:00
return false;
}
/** EXPLANATION - GRIDLOCK DETECTION ALGORITHM:
* This file implements gridlock detection algorithms that use a central
* acquisition history to track all lockvokers suspected of being gridlocked.
*
* ALGORITHM OVERVIEW:
* 1. When a lockvoker finds that DEADLOCK_TIMEOUT_MS has elapsed and it
* still can't acquire a particular lock (firstFailedQutex), it creates
* a new entry in a global acquisition history.
*
* 2. The acquisition history is an unordered_map with:
* - Key: std::shared_ptr<LockerAndInvokerBase>
* (the timed-out lockvoker -- aka, itself)
* - Value: std::pair<
* std::reference_wrapper<Qutex>,
* std::forward_list<std::reference_wrapper<Qutex>>>
* * pair.first: The firstFailedQutex that this lockvoker WANTS but
* can't acquire. This metadata is essential for later-arriving
* entrants to analyze what their predecessor timed-out sequences
* want.
* * pair.second: A list of all acquired Qutexes in this lockvoker's
* continuation history.
*
* 3. Each timed-out lockvoker:
* a) Adds itself to the acquisition history map with its wanted lock and
* acquired locks
* b) Iterates through all OTHER entries in the map (excluding itself)
* c) For each other entry, checks if that entry's acquired locks
* (pair.second) contains the lock that this lockvoker wants
* (aka: firstFailedQutex/pair.first)
* d) If found, we have detected a gridlock: two sequences where at least
* one wants a lock held by the other, and the other wants a lock that
* it can't acquire.
*
* GRIDLOCK CONDITION:
* A gridlock exists when we find a circular chain of dependencies:
* - Lockvoker A wants LockX but can't acquire it (held by Lockvoker B)
* - Lockvoker B wants LockY but can't acquire it (held by Lockvoker C, D, etc.)
* - The chain must be circular (eventually leading back to Lockvoker A or another
* lockvoker in the chain) to ensure it's a true gridlock, not just a delay
*
* TIMED DELAY, I/O DELAY, or LONG-RUNNING OPERATION FALSE-POSITIVE:
* Without circularity detection, we could incorrectly flag a simple delay, I/O
* delay, or long-running operation as a gridlock. For example: Lockvoker A
* wants LockX (held by Lockvoker B), and Lockvoker B is currently in a 10-second
* sleep/delay. When B wakes up, it will release LockX, allowing A to proceed.
* This is not a gridlock - it's just A waiting longer than DEADLOCK_TIMEOUT_MS
* for B to finish its work. True gridlocks require circular dependencies where
* no sequence can make progress because they're all waiting for each other in
* a cycle.
*
* The central history metadata enables us to detect complex gridlocks involving
* multiple lockvokers (2, 3, 4, 5+ sequences) by building up the acquisition
* history over time as different lockvokers timeout and add their information.
*/
template <class OriginalCbFnT>
template <class InvocationTargetT>
bool
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::heuristicallyTraceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex)
{
/** HEURISTIC APPROACH:
* Due to the computational complexity of full circularity detection,
* we implement a heuristically adequate check: when we find 2 sequences
* where one depends on the other, and the other has reached timeout,
* we assume this is a likely gridlock. This approach is not
* algorithmically complete (it may miss some complex circular
* dependencies or flag false positives), but it is heuristically useful
* for debugging and identifying potential concurrency issues in
* practice.
*
* See the file-local comment above for the complete algorithm
* explanation.
*/
return false;
}
template <class OriginalCbFnT>
template <class InvocationTargetT>
bool
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::completelyTraceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex)
{
/** ALGORITHMICALLY COMPLETE VERSION:
* This function is intended to implement the algorithmically complete
* version of gridlock detection that performs full circularity detection.
* This would involve building a dependency graph from the acquisition
* history and using graph traversal algorithms (such as DFS with cycle
* detection) to identify true circular dependencies.
*
* See the file-local comment above for the complete algorithm
* explanation.
*/
return false;
}
2025-09-22 21:29:43 -04:00
template <class OriginalCbFnT>
template <class InvocationTargetT>
void
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::handleDeadlock(Qutex& firstFailedQutex)
2025-09-22 21:29:43 -04:00
{
std::cerr << __func__ << ": Deadlock: "
2025-09-22 21:29:43 -04:00
<< "Lockvoker has been waiting for "
<< std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - this->creationTimestamp)
.count()
<< "ms, failed on qutex @" << &firstFailedQutex
<< " (" << firstFailedQutex.name << ")" << std::endl;
}
#endif
// Explicit template instantiations for the types we need
// Add more as needed for your specific use cases
} // namespace smo