Files
salmanoff/smocore/serializedAsynchronousContinuation.cpp
T
hayodea 71564b4d83 Add QutexAcquisitionHistoryTracker; integrate plumbing
We add the new Qutex acquisision history tracker that allows us
to dynamically detect qutex gridlocks. We've integrated it into
LockerAndInvoker::operator() in a preliminary way.

We also moved all of the trace*ForGridlockOn() methods into the
new QutexAcquisitionHistoryTracker singleton class. They're
more appropriately located there. They're still unimplemented
though.
2025-09-29 19:27:02 -04:00

223 lines
7.6 KiB
C++

#include <config.h>
#include <serializedAsynchronousContinuation.h>
#include <qutex.h>
namespace smo {
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
template <class OriginalCbFnT>
template <class InvocationTargetT>
bool
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::traceContinuationHistoryForDeadlockOn(Qutex& firstFailedQutex)
{
/** EXPLANATION:
* In this function we will trace through the chain of continuations that
* led up to this Lockvoker's continuation. For each continuation which is
* a SerializedAsynchronousContinuation, we check through its LockSet to see
* if it contains the lock that failed acquisition. If it does, we have a
* deadlock.
*/
/* We can't start with the continuation directly referenced by this starting
* Lockvoker as it would contain the all locks we're currently trying to
* acquire...and rightly so because it's the continuation for this current
* lockvoker.
*/
for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
this->serializedContinuation.getCallersContinuationShPtr();
currContin != nullptr;
currContin = currContin->getCallersContinuationShPtr())
{
auto serializedCont = std::dynamic_pointer_cast<
SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
if (serializedCont == nullptr) { continue; }
// Check if the firstFailedQutex is in this continuation's LockSet
try {
const auto& lockUsageDesc = serializedCont->requiredLocks
.getLockUsageDesc(firstFailedQutex);
} catch (const std::runtime_error& e) {
std::cerr << __func__ << ": " << e.what() << std::endl;
continue;
}
std::cout << __func__ << ":Deadlock detected: Found "
<< "firstFailedQutex @" << &firstFailedQutex
<< " (" << firstFailedQutex.name << ") in LockSet of "
<< "SerializedAsynchronousContinuation @"
<< serializedCont.get() << std::endl;
return true;
}
return false;
}
template <class OriginalCbFnT>
template <class InvocationTargetT>
bool
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::obsolete::traceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex)
{
/** EXPLANATION:
* In this function we check for gridlocks which are slightly different
* from deadlocks. In a gridlock, two requests are waiting for locks that
* are held by the other. I.e:
*
* R1 holds LockA and is waiting for LockB.
* R2 holds LockB and is waiting for LockA.
*
* This differs from deadlocks because it's not a single request which is
* attempting to re-acquire a lock that it already holds.
*
* To detect this condition, we wait until the acquisition timeout has
* expired. Then: we extract the current owner of the first lock we're
* failing to acquire.
*
* From there, we go through each of the locks in the foreign owner's
* current (i.e: immediate, most recent continuation's) required LockSet.
* For each of the locks in the foreign owner's most immediate required
* LockSet, we trace backward in our *OWN* history to see if any of *OUR*
* continuations (excluding our most immediate continuation) contains that
* lock.
*
* If we find a match, that means that we're holding a lock that the foreign
* owner is waiting for. And we already know that the foreign owner is
* holding a lock that we're waiting for (when we extracted the current
* owner of the first failed lock in our most immediate Lockset).
*
* Hence, we have a gridlock.
*/
std::shared_ptr<LockerAndInvokerBase> foreignOwnerShPtr =
firstFailedQutex.getCurrOwner();
// If no current owner, can't be a gridlock
if (foreignOwnerShPtr == nullptr)
{ return false; }
// Use reference for the rest of the function for safety.
LockerAndInvokerBase &foreignOwner = *foreignOwnerShPtr;
/* For each lock in the foreign owner's LockSet, check if we hold it
* in any of our previous continuations (excluding our most immediate one)
*/
for (size_t i = 0; i < foreignOwner.getLockSetSize(); ++i)
{
Qutex& foreignLock = foreignOwner.getLockAt(i);
/* Skip the firstFailedQutex since we already know the foreign owner
* holds it -- hence it's impossible for any of our previous
* continuations to hold it.
*/
if (&foreignLock == &firstFailedQutex)
{ continue; }
/** EXPLANATION:
* Trace backward through our continuation history (excluding our most
* immediate continuation).
*
* The reason we exclude our most immediate continuation is because the
* LockSet acquisition algorithm backs off if it fails to acquire ALL
* locks in the set. So if the lock that the foreign owner is waiting
* for is in our most immediate continuation, and NOT in one of our
* previous continuations, then we will back off and the foreign owner
* should eventually be able to acquire that lock.
*/
for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
this->serializedContinuation.getCallersContinuationShPtr();
currContin != nullptr;
currContin = currContin->getCallersContinuationShPtr())
{
auto serializedCont = std::dynamic_pointer_cast<
SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
if (serializedCont == nullptr) { continue; }
// Check if this continuation holds the foreign lock
try {
const auto& lockUsageDesc = serializedCont->requiredLocks
.getLockUsageDesc(foreignLock);
// Matched! We hold a lock that the foreign owner is waiting for
std::cout << __func__ << ": Gridlock detected: We hold lock @"
<< &foreignLock << " (" << foreignLock.name << ") in "
"continuation @" << serializedCont.get()
<< ", while foreign owner @" << &foreignOwner
<< " holds lock @" << &firstFailedQutex << " ("
<< firstFailedQutex.name << ") that we're waiting for"
<< std::endl;
return true;
} catch (const std::runtime_error& e) {
// This continuation doesn't hold the foreign lock. Continue.
continue;
}
}
}
return false;
}
template <class OriginalCbFnT>
template <class InvocationTargetT>
void
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::handleDeadlock(Qutex& firstFailedQutex)
{
std::cerr << __func__ << ": Deadlock: "
<< "Lockvoker has been waiting for "
<< std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - this->creationTimestamp)
.count()
<< "ms, failed on qutex @" << &firstFailedQutex
<< " (" << firstFailedQutex.name << ")" << std::endl;
}
#endif
template <class OriginalCbFnT>
std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>
SerializedAsynchronousContinuation<OriginalCbFnT>::getAcquiredQutexHistory()
const
{
auto heldLocks = std::make_unique<
std::forward_list<std::reference_wrapper<Qutex>>>();
/** EXPLANATION:
* Walk through the continuation chain to collect all acquired locks
*
* We don't add the current continuation's locks because it's the one
* failing to acquire locks and backing off. So we start from the previous
* continuation.
*/
for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
this->getCallersContinuationShPtr();
currContin != nullptr;
currContin = currContin->getCallersContinuationShPtr())
{
auto serializedCont = std::dynamic_pointer_cast<
SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
if (serializedCont == nullptr) { continue; }
// Add this continuation's locks to the held locks list
for (size_t i = 0; i < serializedCont->requiredLocks.locks.size(); ++i)
{
heldLocks->push_front(serializedCont->requiredLocks.locks[i].first);
}
}
return heldLocks;
}
// Explicit template instantiations for the types we need
// Add more as needed for your specific use cases
} // namespace smo