#include #include #include namespace smo { #ifdef CONFIG_ENABLE_DEBUG_LOCKS template template bool SerializedAsynchronousContinuation ::LockerAndInvoker ::traceContinuationHistoryForDeadlockOn(Qutex& firstFailedQutex) { /** EXPLANATION: * In this function we will trace through the chain of continuations that * led up to this Lockvoker's continuation. For each continuation which is * a SerializedAsynchronousContinuation, we check through its LockSet to see * if it contains the lock that failed acquisition. If it does, we have a * deadlock. */ /* We can't start with the continuation directly referenced by this starting * Lockvoker as it would contain the all locks we're currently trying to * acquire...and rightly so because it's the continuation for this current * lockvoker. */ for (std::shared_ptr currContin = this->serializedContinuation.getCallersContinuationShPtr(); currContin != nullptr; currContin = currContin->getCallersContinuationShPtr()) { auto serializedCont = std::dynamic_pointer_cast< SerializedAsynchronousContinuation>(currContin); if (serializedCont == nullptr) { continue; } // Check if the firstFailedQutex is in this continuation's LockSet try { const auto& lockUsageDesc = serializedCont->requiredLocks .getLockUsageDesc(firstFailedQutex); } catch (const std::runtime_error& e) { std::cerr << __func__ << ": " << e.what() << std::endl; continue; } std::cout << __func__ << ":Deadlock detected: Found " << "firstFailedQutex @" << &firstFailedQutex << " (" << firstFailedQutex.name << ") in LockSet of " << "SerializedAsynchronousContinuation @" << serializedCont.get() << std::endl; return true; } return false; } template template bool SerializedAsynchronousContinuation ::LockerAndInvoker ::obsolete::traceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex) { /** EXPLANATION: * In this function we check for gridlocks which are slightly different * from deadlocks. In a gridlock, two requests are waiting for locks that * are held by the other. I.e: * * R1 holds LockA and is waiting for LockB. * R2 holds LockB and is waiting for LockA. * * This differs from deadlocks because it's not a single request which is * attempting to re-acquire a lock that it already holds. * * To detect this condition, we wait until the acquisition timeout has * expired. Then: we extract the current owner of the first lock we're * failing to acquire. * * From there, we go through each of the locks in the foreign owner's * current (i.e: immediate, most recent continuation's) required LockSet. * For each of the locks in the foreign owner's most immediate required * LockSet, we trace backward in our *OWN* history to see if any of *OUR* * continuations (excluding our most immediate continuation) contains that * lock. * * If we find a match, that means that we're holding a lock that the foreign * owner is waiting for. And we already know that the foreign owner is * holding a lock that we're waiting for (when we extracted the current * owner of the first failed lock in our most immediate Lockset). * * Hence, we have a gridlock. */ std::shared_ptr foreignOwnerShPtr = firstFailedQutex.getCurrOwner(); // If no current owner, can't be a gridlock if (foreignOwnerShPtr == nullptr) { return false; } // Use reference for the rest of the function for safety. LockerAndInvokerBase &foreignOwner = *foreignOwnerShPtr; /* For each lock in the foreign owner's LockSet, check if we hold it * in any of our previous continuations (excluding our most immediate one) */ for (size_t i = 0; i < foreignOwner.getLockSetSize(); ++i) { Qutex& foreignLock = foreignOwner.getLockAt(i); /* Skip the firstFailedQutex since we already know the foreign owner * holds it -- hence it's impossible for any of our previous * continuations to hold it. */ if (&foreignLock == &firstFailedQutex) { continue; } /** EXPLANATION: * Trace backward through our continuation history (excluding our most * immediate continuation). * * The reason we exclude our most immediate continuation is because the * LockSet acquisition algorithm backs off if it fails to acquire ALL * locks in the set. So if the lock that the foreign owner is waiting * for is in our most immediate continuation, and NOT in one of our * previous continuations, then we will back off and the foreign owner * should eventually be able to acquire that lock. */ for (std::shared_ptr currContin = this->serializedContinuation.getCallersContinuationShPtr(); currContin != nullptr; currContin = currContin->getCallersContinuationShPtr()) { auto serializedCont = std::dynamic_pointer_cast< SerializedAsynchronousContinuation>(currContin); if (serializedCont == nullptr) { continue; } // Check if this continuation holds the foreign lock try { const auto& lockUsageDesc = serializedCont->requiredLocks .getLockUsageDesc(foreignLock); // Matched! We hold a lock that the foreign owner is waiting for std::cout << __func__ << ": Gridlock detected: We hold lock @" << &foreignLock << " (" << foreignLock.name << ") in " "continuation @" << serializedCont.get() << ", while foreign owner @" << &foreignOwner << " holds lock @" << &firstFailedQutex << " (" << firstFailedQutex.name << ") that we're waiting for" << std::endl; return true; } catch (const std::runtime_error& e) { // This continuation doesn't hold the foreign lock. Continue. continue; } } } return false; } /** EXPLANATION - GRIDLOCK DETECTION ALGORITHM: * This file implements gridlock detection algorithms that use a central * acquisition history to track all lockvokers suspected of being gridlocked. * * ALGORITHM OVERVIEW: * 1. When a lockvoker finds that DEADLOCK_TIMEOUT_MS has elapsed and it * still can't acquire a particular lock (firstFailedQutex), it creates * a new entry in a global acquisition history. * * 2. The acquisition history is an unordered_map with: * - Key: std::shared_ptr * (the timed-out lockvoker -- aka, itself) * - Value: std::pair< * std::reference_wrapper, * std::forward_list>> * * pair.first: The firstFailedQutex that this lockvoker WANTS but * can't acquire. This metadata is essential for later-arriving * entrants to analyze what their predecessor timed-out sequences * want. * * pair.second: A list of all acquired Qutexes in this lockvoker's * continuation history. * * 3. Each timed-out lockvoker: * a) Adds itself to the acquisition history map with its wanted lock and * acquired locks * b) Iterates through all OTHER entries in the map (excluding itself) * c) For each other entry, checks if that entry's acquired locks * (pair.second) contains the lock that this lockvoker wants * (aka: firstFailedQutex/pair.first) * d) If found, we have detected a gridlock: two sequences where at least * one wants a lock held by the other, and the other wants a lock that * it can't acquire. * * GRIDLOCK CONDITION: * A gridlock exists when we find a circular chain of dependencies: * - Lockvoker A wants LockX but can't acquire it (held by Lockvoker B) * - Lockvoker B wants LockY but can't acquire it (held by Lockvoker C, D, etc.) * - The chain must be circular (eventually leading back to Lockvoker A or another * lockvoker in the chain) to ensure it's a true gridlock, not just a delay * * TIMED DELAY, I/O DELAY, or LONG-RUNNING OPERATION FALSE-POSITIVE: * Without circularity detection, we could incorrectly flag a simple delay, I/O * delay, or long-running operation as a gridlock. For example: Lockvoker A * wants LockX (held by Lockvoker B), and Lockvoker B is currently in a 10-second * sleep/delay. When B wakes up, it will release LockX, allowing A to proceed. * This is not a gridlock - it's just A waiting longer than DEADLOCK_TIMEOUT_MS * for B to finish its work. True gridlocks require circular dependencies where * no sequence can make progress because they're all waiting for each other in * a cycle. * * The central history metadata enables us to detect complex gridlocks involving * multiple lockvokers (2, 3, 4, 5+ sequences) by building up the acquisition * history over time as different lockvokers timeout and add their information. */ template template bool SerializedAsynchronousContinuation ::LockerAndInvoker ::heuristicallyTraceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex) { /** HEURISTIC APPROACH: * Due to the computational complexity of full circularity detection, * we implement a heuristically adequate check: when we find 2 sequences * where one depends on the other, and the other has reached timeout, * we assume this is a likely gridlock. This approach is not * algorithmically complete (it may miss some complex circular * dependencies or flag false positives), but it is heuristically useful * for debugging and identifying potential concurrency issues in * practice. * * See the file-local comment above for the complete algorithm * explanation. */ return false; } template template bool SerializedAsynchronousContinuation ::LockerAndInvoker ::completelyTraceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex) { /** ALGORITHMICALLY COMPLETE VERSION: * This function is intended to implement the algorithmically complete * version of gridlock detection that performs full circularity detection. * This would involve building a dependency graph from the acquisition * history and using graph traversal algorithms (such as DFS with cycle * detection) to identify true circular dependencies. * * See the file-local comment above for the complete algorithm * explanation. */ return false; } template template void SerializedAsynchronousContinuation ::LockerAndInvoker ::handleDeadlock(Qutex& firstFailedQutex) { std::cerr << __func__ << ": Deadlock: " << "Lockvoker has been waiting for " << std::chrono::duration_cast( std::chrono::steady_clock::now() - this->creationTimestamp) .count() << "ms, failed on qutex @" << &firstFailedQutex << " (" << firstFailedQutex.name << ")" << std::endl; } #endif // Explicit template instantiations for the types we need // Add more as needed for your specific use cases } // namespace smo