Locking: Add contin tracing to detect deadlocks

We added the code to trace all the contins linked to a particular
Lockvoker, into SerializedAsyncContinuation. This basically
ensures that we'll almost never deal with a deadlock. So cool.
This commit is contained in:
2025-09-27 20:51:20 -04:00
parent 782bcd4567
commit f8bf8083af
4 changed files with 73 additions and 8 deletions
+5 -1
View File
@@ -38,7 +38,6 @@ public:
std::shared_ptr<AsynchronousContinuation<OriginalCbFnT>>
lifetimePreservingConveyance);
/** EXPLANATION:
* When an exception is thrown in a an async callee, which pertains to an
* error in the data given by the caller, we ought not to throw the
@@ -67,6 +66,11 @@ public:
{ std::rethrow_exception(exception); }
}
// Implement the virtual method from AsynchronousContinuationChainLink
virtual std::shared_ptr<AsynchronousContinuationChainLink>
getCallersContinuation() override
{ return originalCallback.callerContinuation; }
public:
Callback<OriginalCbFnT> originalCallback;
std::exception_ptr exception;
@@ -18,6 +18,9 @@ class AsynchronousContinuationChainLink
{
public:
virtual ~AsynchronousContinuationChainLink() = default;
virtual std::shared_ptr<AsynchronousContinuationChainLink>
getCallersContinuation() = 0;
};
} // namespace smo
+11 -3
View File
@@ -100,7 +100,14 @@ public:
{ return; }
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
handleLikelyDeadlock(*firstFailedQutexPtr);
Qutex &firstFailedQutex = *firstFailedQutexPtr;
bool isDeadlock = traceContinuationHistoryForDeadlockOn(
firstFailedQutex);
if (!isDeadlock)
{ return; }
handleDeadlock(firstFailedQutex);
#endif
return;
}
@@ -186,12 +193,13 @@ public:
#endif
}
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
bool traceContinuationHistoryForDeadlockOn(Qutex &firstFailedQutex);
/**
* @brief Handle a likely deadlock situation by logging debug information
* @param firstFailedQutex The first qutex that failed acquisition
*/
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
void handleLikelyDeadlock(Qutex& firstFailedQutex);
void handleDeadlock(Qutex& firstFailedQutex);
#endif
private:
+54 -4
View File
@@ -5,22 +5,72 @@
namespace smo {
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
template <class OriginalCbFnT>
template <class InvocationTargetT>
bool
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::traceContinuationHistoryForDeadlockOn(Qutex& firstFailedQutex)
{
/** EXPLANATION:
* In this function we will trace through the chain of continuations that
* led up to this Lockvoker's continuation. For each continuation which is
* a SerializedAsynchronousContinuation, we check through its LockSet to see
* if it contains the lock that failed acquisition. If it does, we have a
* deadlock.
*/
/* We can't start with the continuation directly referenced by this starting
* Lockvoker as it would contain the all locks we're currently trying to
* acquire...and rightly so because it's the continuation for this current
* lockvoker.
*/
for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
this->serializedContinuation.getCallersContinuation();
currContin != nullptr;
currContin = currContin->getCallersContinuation())
{
auto serializedCont = std::dynamic_pointer_cast<
SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
if (serializedCont == nullptr) { continue; }
// Check if the firstFailedQutex is in this continuation's LockSet
try {
const auto& lockUsageDesc = serializedCont->requiredLocks
.getLockUsageDesc(firstFailedQutex);
} catch (const std::runtime_error& e) {
std::cerr << __func__ << ": " << e.what() << std::endl;
continue;
}
std::cout << __func__ << ":Deadlock detected: Found "
<< "firstFailedQutex @" << &firstFailedQutex
<< " (" << firstFailedQutex.name << ") in LockSet of "
<< "SerializedAsynchronousContinuation @"
<< serializedCont.get() << std::endl;
return true;
}
return false;
}
template <class OriginalCbFnT>
template <class InvocationTargetT>
void
SerializedAsynchronousContinuation<OriginalCbFnT>
::LockerAndInvoker<InvocationTargetT>
::handleLikelyDeadlock(Qutex& firstFailedQutex)
::handleDeadlock(Qutex& firstFailedQutex)
{
std::cerr << __func__ << ": Deadlock likely: "
std::cerr << __func__ << ": Deadlock: "
<< "Lockvoker has been waiting for "
<< std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - this->creationTimestamp)
.count()
<< "ms, failed on qutex @" << &firstFailedQutex
<< " (" << firstFailedQutex.name << ")" << std::endl;
}
#endif