Debug:Qutex: Add deadlock detection based on elapsed time

We now detect that a deadlock is likely when
CONFIG_DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS has elapsed. This is the
preliminary work required to do a backtrace through the call
stack and figure out if a deadlock has really occured.

To do this, we'd have to go through the async call chain and
search for a previous caller which acquired the same qutex as
the one that first failed during this Lockvoker LockSet acquisition
attempt.
This commit is contained in:
2025-09-21 15:11:28 -04:00
parent dbc9569775
commit d2ed525106
6 changed files with 59 additions and 9 deletions
+3
View File
@@ -12,6 +12,9 @@
/* World thread configuration */
#cmakedefine WORLD_USE_BODY_THREAD
/* Qutex deadlock detection configuration */
#define DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS @DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS@
/* Cross-compilation configuration */
#cmakedefine CMAKE_CROSSCOMPILING
+4 -1
View File
@@ -81,9 +81,12 @@ public:
/**
* @brief Try to acquire all locks in order; back off if acquisition fails
* @param lockvoker The LockerAndInvoker attempting to acquire the locks
* @param firstFailedQutex Output parameter to receive the first Qutex that
* failed acquisition (can be nullptr)
* @return true if all locks were acquired, false otherwise
*/
bool tryAcquireOrBackOff(LockerAndInvokerBase &lockvoker);
bool tryAcquireOrBackOff(
LockerAndInvokerBase &lockvoker, Qutex *firstFailedQutex = nullptr);
void unregisterFromQutexQueues();
/**
+3 -2
View File
@@ -24,8 +24,8 @@ public:
/**
* @brief Constructor
*/
Qutex()
: isOwned(false)
Qutex(const std::string &_name)
: isOwned(false), name(_name)
{}
/**
@@ -88,6 +88,7 @@ public:
SpinLock lock;
LockerAndInvokerBase::List queue;
bool isOwned;
std::string name;
};
} // namespace smo
+34 -5
View File
@@ -4,6 +4,8 @@
#include <functional>
#include <memory>
#include <atomic>
#include <chrono>
#include <iostream>
#include <componentThread.h>
#include <lockSet.h>
#include <asynchronousContinuation.h>
@@ -63,7 +65,8 @@ public:
: LockerAndInvokerBase(&serializedContinuation),
serializedContinuation(serializedContinuation),
target(target),
invocationTarget(std::move(invocationTarget))
invocationTarget(std::move(invocationTarget)),
creationTimestamp(std::chrono::steady_clock::now())
{
firstWake();
}
@@ -81,11 +84,28 @@ public:
"executing on wrong ComponentThread");
}
Qutex *firstFailedQutexPtr = nullptr;
bool isDeadlockLikely = isDeadlockLikely();
if (!serializedContinuation.requiredLocks.tryAcquireOrBackOff(
*this))
*this, (isDeadlockLikely ? &firstFailedQutexPtr : nullptr)))
{
// Just allow this lockvoker to be dropped from its io_service.
allowAwakening();
if (!isDeadlockLikely)
{ return; }
Qutex &firstFailedQutex = *firstFailedQutexPtr;
std::cerr << __func__ << ": Deadlock likely: "
<< "Lockvoker has been waiting for "
<< std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - creationTimestamp)
.count()
<< "ms, failed on qutex @" << &firstFailedQutex
<< " (" << firstFailedQutex.name << ")" << std::endl;
return;
}
@@ -137,9 +157,8 @@ public:
target->getIoService().post(*this);
}
/**
* @brief Allow awakening by resetting the awake flag
*/
private:
// Allow awakening by resetting the awake flag
void allowAwakening()
{ serializedContinuation.isAwakeOrBeingAwakened.store(false); }
@@ -158,11 +177,21 @@ public:
awaken(true);
}
// Check if CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS has elapsed since creation
bool isDeadlockLikely() const
{
auto now = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
now - creationTimestamp);
return elapsed.count() >= DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS;
}
private:
SerializedAsynchronousContinuation<OriginalCbFnT>
&serializedContinuation;
InvocationTargetT invocationTarget;
std::shared_ptr<ComponentThread> target;
std::chrono::steady_clock::time_point creationTimestamp;
};
};