Qutex: Add gridlock detection

This commit is contained in:
2025-09-29 13:38:53 -04:00
parent 6b4fe05fc0
commit 462247d743
3 changed files with 110 additions and 1 deletions
+6
View File
@@ -45,6 +45,12 @@ public:
*/
virtual void awaken(bool forceAwaken = false) = 0;
/* These two are ued to iterate through the lockset of a Lockvoker in a
* template-erased manner. We use them in the gridlock detection algorithm.
*/
virtual size_t getLockSetSize() const = 0;
virtual Qutex& getLockAt(size_t index) const = 0;
/**
* @brief Equality operator
*
@@ -175,6 +175,15 @@ public:
target->getIoService().post(*this);
}
size_t getLockSetSize() const override
{ return serializedContinuation.requiredLocks.locks.size(); }
Qutex& getLockAt(size_t index) const override
{
return serializedContinuation.requiredLocks.locks[index]
.first.get();
}
private:
// Allow awakening by resetting the awake flag
void allowAwakening()
+95 -1
View File
@@ -64,7 +64,101 @@ SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::traceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex)
{
// Empty implementation - to be filled in later
/** EXPLANATION:
* In this function we check for gridlocks which are slightly different
* from deadlocks. In a gridlock, two requests are waiting for locks that
* are held by the other. I.e:
*
* R1 holds LockA and is waiting for LockB.
* R2 holds LockB and is waiting for LockA.
*
* This differs from deadlocks because it's not a single request which is
* attempting to re-acquire a lock that it already holds.
*
* To detect this condition, we wait until the acquisition timeout has
* expired. Then: we extract the current owner of the first lock we're
* failing to acquire.
*
* From there, we go through each of the locks in the foreign owner's
* current (i.e: immediate, most recent continuation's) required LockSet.
* For each of the locks in the foreign owner's most immediate required
* LockSet, we trace backward in our *OWN* history to see if any of *OUR*
* continuations (excluding our most immediate continuation) contains that
* lock.
*
* If we find a match, that means that we're holding a lock that the foreign
* owner is waiting for. And we already know that the foreign owner is
* holding a lock that we're waiting for (when we extracted the current
* owner of the first failed lock in our most immediate Lockset).
*
* Hence, we have a gridlock.
*/
LockerAndInvokerBase* foreignOwnerPtr = firstFailedQutex.getCurrOwner();
// If no current owner, can't be a gridlock
if (foreignOwnerPtr == nullptr)
{ return false; }
// Use reference for the rest of the function for safety.
LockerAndInvokerBase& foreignOwner = *foreignOwnerPtr;
/* For each lock in the foreign owner's LockSet, check if we hold it
* in any of our previous continuations (excluding our most immediate one)
*/
for (size_t i = 0; i < foreignOwner.getLockSetSize(); ++i)
{
Qutex& foreignLock = foreignOwner.getLockAt(i);
/* Skip the firstFailedQutex since we already know the foreign owner
* holds it -- hence it's impossible for any of our previous
* continuations to hold it.
*/
if (&foreignLock == &firstFailedQutex)
{ continue; }
/** EXPLANATION:
* Trace backward through our continuation history (excluding our most
* immediate continuation).
*
* The reason we exclude our most immediate continuation is because the
* LockSet acquisition algorithm backs off if it fails to acquire ALL
* locks in the set. So if the lock that the foreign owner is waiting
* for is in our most immediate continuation, and NOT in one of our
* previous continuations, then we will back off and the foreign owner
* should eventually be able to acquire that lock.
*/
for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
this->serializedContinuation.getCallersContinuation();
currContin != nullptr;
currContin = currContin->getCallersContinuation())
{
auto serializedCont = std::dynamic_pointer_cast<
SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
if (serializedCont == nullptr) { continue; }
// Check if this continuation holds the foreign lock
try {
const auto& lockUsageDesc = serializedCont->requiredLocks
.getLockUsageDesc(foreignLock);
// Matched! We hold a lock that the foreign owner is waiting for
std::cout << __func__ << ": Gridlock detected: We hold lock @"
<< &foreignLock << " (" << foreignLock.name << ") in "
"continuation @" << serializedCont.get()
<< ", while foreign owner @" << &foreignOwner
<< " holds lock @" << &firstFailedQutex << " ("
<< firstFailedQutex.name << ") that we're waiting for"
<< std::endl;
return true;
} catch (const std::runtime_error& e) {
// This continuation doesn't hold the foreign lock. Continue.
continue;
}
}
}
return false;
}