Locking: Add basic reactive deadlock detection foundation

We added a timestamp to each Lockvoker so that we can detect when
a lockvoker has been in a qutex for "too long", where "too long"
is defined arbitrarily as 500ms.

Next we're going to change the way we create callbacks to enable
us to more explicitly access the sh_ptr<AsyncContin> via
the callback object.
This commit is contained in:
2025-09-22 20:45:36 -04:00
parent d2ed525106
commit 092a0954a0
5 changed files with 53 additions and 31 deletions
+28 -19
View File
@@ -1,6 +1,7 @@
#ifndef SERIALIZED_ASYNCHRONOUS_CONTINUATION_H
#define SERIALIZED_ASYNCHRONOUS_CONTINUATION_H
#include <config.h>
#include <functional>
#include <memory>
#include <atomic>
@@ -63,10 +64,12 @@ public:
const std::shared_ptr<ComponentThread>& target,
InvocationTargetT invocationTarget)
: LockerAndInvokerBase(&serializedContinuation),
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
creationTimestamp(std::chrono::steady_clock::now()),
#endif
serializedContinuation(serializedContinuation),
target(target),
invocationTarget(std::move(invocationTarget)),
creationTimestamp(std::chrono::steady_clock::now())
invocationTarget(std::move(invocationTarget))
{
firstWake();
}
@@ -85,27 +88,19 @@ public:
}
Qutex *firstFailedQutexPtr = nullptr;
bool isDeadlockLikely = isDeadlockLikely();
bool deadlockLikely = isDeadlockLikely();
if (!serializedContinuation.requiredLocks.tryAcquireOrBackOff(
*this, (isDeadlockLikely ? &firstFailedQutexPtr : nullptr)))
*this, (deadlockLikely ? &firstFailedQutexPtr : nullptr)))
{
// Just allow this lockvoker to be dropped from its io_service.
allowAwakening();
if (!isDeadlockLikely)
if (!deadlockLikely)
{ return; }
Qutex &firstFailedQutex = *firstFailedQutexPtr;
std::cerr << __func__ << ": Deadlock likely: "
<< "Lockvoker has been waiting for "
<< std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - creationTimestamp)
.count()
<< "ms, failed on qutex @" << &firstFailedQutex
<< " (" << firstFailedQutex.name << ")" << std::endl;
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
handleLikelyDeadlock(*firstFailedQutexPtr);
#endif
return;
}
@@ -177,21 +172,35 @@ public:
awaken(true);
}
// Check if CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS has elapsed since creation
// Has CONFIG_DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS elapsed since creation?
bool isDeadlockLikely() const
{
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
auto now = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
now - creationTimestamp);
return elapsed.count() >= DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS;
return elapsed.count() >= CONFIG_DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS;
#else
return false;
#endif
}
/**
* @brief Handle a likely deadlock situation by logging debug information
* @param firstFailedQutex The first qutex that failed acquisition
*/
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
void handleLikelyDeadlock(Qutex& firstFailedQutex);
#endif
private:
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
std::chrono::steady_clock::time_point creationTimestamp;
#endif
SerializedAsynchronousContinuation<OriginalCbFnT>
&serializedContinuation;
InvocationTargetT invocationTarget;
std::shared_ptr<ComponentThread> target;
std::chrono::steady_clock::time_point creationTimestamp;
};
};