Debug:Qutex: Add deadlock detection based on elapsed time
We now detect that a deadlock is likely when CONFIG_DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS has elapsed. This is the preliminary work required to do a backtrace through the call stack and figure out if a deadlock has really occured. To do this, we'd have to go through the async call chain and search for a previous caller which acquired the same qutex as the one that first failed during this Lockvoker LockSet acquisition attempt.
This commit is contained in:
@@ -26,6 +26,14 @@ math(EXPR MIND_VOSCILLATOR_FREQ_MS "1000 / ${MIND_VOSCILLATOR_PERIOD_MS}")
|
|||||||
# World thread configuration
|
# World thread configuration
|
||||||
option(WORLD_USE_BODY_THREAD
|
option(WORLD_USE_BODY_THREAD
|
||||||
"Use body thread for world component instead of separate world thread" OFF)
|
"Use body thread for world component instead of separate world thread" OFF)
|
||||||
|
|
||||||
|
# Qutex deadlock detection configuration
|
||||||
|
set(DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS 500 CACHE STRING
|
||||||
|
"Timeout in milliseconds for deadlock detection in qutex system")
|
||||||
|
if(NOT DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS GREATER 0)
|
||||||
|
message(FATAL_ERROR "DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS must be a positive integer > 0")
|
||||||
|
endif()
|
||||||
|
|
||||||
# Test configuration
|
# Test configuration
|
||||||
option(ENABLE_TESTS "Enable building tests" OFF)
|
option(ENABLE_TESTS "Enable building tests" OFF)
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,9 @@
|
|||||||
/* World thread configuration */
|
/* World thread configuration */
|
||||||
#cmakedefine WORLD_USE_BODY_THREAD
|
#cmakedefine WORLD_USE_BODY_THREAD
|
||||||
|
|
||||||
|
/* Qutex deadlock detection configuration */
|
||||||
|
#define DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS @DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS@
|
||||||
|
|
||||||
/* Cross-compilation configuration */
|
/* Cross-compilation configuration */
|
||||||
#cmakedefine CMAKE_CROSSCOMPILING
|
#cmakedefine CMAKE_CROSSCOMPILING
|
||||||
|
|
||||||
|
|||||||
+4
-1
@@ -81,9 +81,12 @@ public:
|
|||||||
/**
|
/**
|
||||||
* @brief Try to acquire all locks in order; back off if acquisition fails
|
* @brief Try to acquire all locks in order; back off if acquisition fails
|
||||||
* @param lockvoker The LockerAndInvoker attempting to acquire the locks
|
* @param lockvoker The LockerAndInvoker attempting to acquire the locks
|
||||||
|
* @param firstFailedQutex Output parameter to receive the first Qutex that
|
||||||
|
* failed acquisition (can be nullptr)
|
||||||
* @return true if all locks were acquired, false otherwise
|
* @return true if all locks were acquired, false otherwise
|
||||||
*/
|
*/
|
||||||
bool tryAcquireOrBackOff(LockerAndInvokerBase &lockvoker);
|
bool tryAcquireOrBackOff(
|
||||||
|
LockerAndInvokerBase &lockvoker, Qutex *firstFailedQutex = nullptr);
|
||||||
void unregisterFromQutexQueues();
|
void unregisterFromQutexQueues();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
+3
-2
@@ -24,8 +24,8 @@ public:
|
|||||||
/**
|
/**
|
||||||
* @brief Constructor
|
* @brief Constructor
|
||||||
*/
|
*/
|
||||||
Qutex()
|
Qutex(const std::string &_name)
|
||||||
: isOwned(false)
|
: isOwned(false), name(_name)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -88,6 +88,7 @@ public:
|
|||||||
SpinLock lock;
|
SpinLock lock;
|
||||||
LockerAndInvokerBase::List queue;
|
LockerAndInvokerBase::List queue;
|
||||||
bool isOwned;
|
bool isOwned;
|
||||||
|
std::string name;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace smo
|
} // namespace smo
|
||||||
|
|||||||
@@ -4,6 +4,8 @@
|
|||||||
#include <functional>
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
#include <componentThread.h>
|
#include <componentThread.h>
|
||||||
#include <lockSet.h>
|
#include <lockSet.h>
|
||||||
#include <asynchronousContinuation.h>
|
#include <asynchronousContinuation.h>
|
||||||
@@ -63,7 +65,8 @@ public:
|
|||||||
: LockerAndInvokerBase(&serializedContinuation),
|
: LockerAndInvokerBase(&serializedContinuation),
|
||||||
serializedContinuation(serializedContinuation),
|
serializedContinuation(serializedContinuation),
|
||||||
target(target),
|
target(target),
|
||||||
invocationTarget(std::move(invocationTarget))
|
invocationTarget(std::move(invocationTarget)),
|
||||||
|
creationTimestamp(std::chrono::steady_clock::now())
|
||||||
{
|
{
|
||||||
firstWake();
|
firstWake();
|
||||||
}
|
}
|
||||||
@@ -81,11 +84,28 @@ public:
|
|||||||
"executing on wrong ComponentThread");
|
"executing on wrong ComponentThread");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Qutex *firstFailedQutexPtr = nullptr;
|
||||||
|
bool isDeadlockLikely = isDeadlockLikely();
|
||||||
|
|
||||||
if (!serializedContinuation.requiredLocks.tryAcquireOrBackOff(
|
if (!serializedContinuation.requiredLocks.tryAcquireOrBackOff(
|
||||||
*this))
|
*this, (isDeadlockLikely ? &firstFailedQutexPtr : nullptr)))
|
||||||
{
|
{
|
||||||
// Just allow this lockvoker to be dropped from its io_service.
|
// Just allow this lockvoker to be dropped from its io_service.
|
||||||
allowAwakening();
|
allowAwakening();
|
||||||
|
|
||||||
|
if (!isDeadlockLikely)
|
||||||
|
{ return; }
|
||||||
|
|
||||||
|
Qutex &firstFailedQutex = *firstFailedQutexPtr;
|
||||||
|
|
||||||
|
std::cerr << __func__ << ": Deadlock likely: "
|
||||||
|
<< "Lockvoker has been waiting for "
|
||||||
|
<< std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||||
|
std::chrono::steady_clock::now() - creationTimestamp)
|
||||||
|
.count()
|
||||||
|
<< "ms, failed on qutex @" << &firstFailedQutex
|
||||||
|
<< " (" << firstFailedQutex.name << ")" << std::endl;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -137,9 +157,8 @@ public:
|
|||||||
target->getIoService().post(*this);
|
target->getIoService().post(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private:
|
||||||
* @brief Allow awakening by resetting the awake flag
|
// Allow awakening by resetting the awake flag
|
||||||
*/
|
|
||||||
void allowAwakening()
|
void allowAwakening()
|
||||||
{ serializedContinuation.isAwakeOrBeingAwakened.store(false); }
|
{ serializedContinuation.isAwakeOrBeingAwakened.store(false); }
|
||||||
|
|
||||||
@@ -158,11 +177,21 @@ public:
|
|||||||
awaken(true);
|
awaken(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS has elapsed since creation
|
||||||
|
bool isDeadlockLikely() const
|
||||||
|
{
|
||||||
|
auto now = std::chrono::steady_clock::now();
|
||||||
|
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||||
|
now - creationTimestamp);
|
||||||
|
return elapsed.count() >= DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SerializedAsynchronousContinuation<OriginalCbFnT>
|
SerializedAsynchronousContinuation<OriginalCbFnT>
|
||||||
&serializedContinuation;
|
&serializedContinuation;
|
||||||
InvocationTargetT invocationTarget;
|
InvocationTargetT invocationTarget;
|
||||||
std::shared_ptr<ComponentThread> target;
|
std::shared_ptr<ComponentThread> target;
|
||||||
|
std::chrono::steady_clock::time_point creationTimestamp;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
+7
-1
@@ -47,7 +47,9 @@ void LockSet<OriginalCbFnT>::registerInQutexQueues(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class OriginalCbFnT>
|
template <class OriginalCbFnT>
|
||||||
bool LockSet<OriginalCbFnT>::tryAcquireOrBackOff(LockerAndInvokerBase &lockvoker)
|
bool LockSet<OriginalCbFnT>::tryAcquireOrBackOff(
|
||||||
|
LockerAndInvokerBase &lockvoker, Qutex *firstFailedQutex
|
||||||
|
)
|
||||||
{
|
{
|
||||||
if (!registeredInQutexQueues)
|
if (!registeredInQutexQueues)
|
||||||
{
|
{
|
||||||
@@ -72,6 +74,10 @@ bool LockSet<OriginalCbFnT>::tryAcquireOrBackOff(LockerAndInvokerBase &lockvoker
|
|||||||
if (!lockUsageDesc.first.get().tryAcquire(
|
if (!lockUsageDesc.first.get().tryAcquire(
|
||||||
lockvoker, nRequiredLocks))
|
lockvoker, nRequiredLocks))
|
||||||
{
|
{
|
||||||
|
// Set the first failed qutex for debugging
|
||||||
|
if (firstFailedQutex) {
|
||||||
|
*firstFailedQutex = &lockUsageDesc.first.get();
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user