diff --git a/CMakeLists.txt b/CMakeLists.txt index 804e706..b9968ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,14 @@ math(EXPR MIND_VOSCILLATOR_FREQ_MS "1000 / ${MIND_VOSCILLATOR_PERIOD_MS}") # World thread configuration option(WORLD_USE_BODY_THREAD "Use body thread for world component instead of separate world thread" OFF) + +# Qutex deadlock detection configuration +set(DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS 500 CACHE STRING + "Timeout in milliseconds for deadlock detection in qutex system") +if(NOT DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS GREATER 0) + message(FATAL_ERROR "DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS must be a positive integer > 0") +endif() + # Test configuration option(ENABLE_TESTS "Enable building tests" OFF) diff --git a/include/config.h.in b/include/config.h.in index a764bea..1ab4536 100644 --- a/include/config.h.in +++ b/include/config.h.in @@ -12,6 +12,9 @@ /* World thread configuration */ #cmakedefine WORLD_USE_BODY_THREAD +/* Qutex deadlock detection configuration */ +#define DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS @DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS@ + /* Cross-compilation configuration */ #cmakedefine CMAKE_CROSSCOMPILING diff --git a/include/lockSet.h b/include/lockSet.h index 96aba72..f416356 100644 --- a/include/lockSet.h +++ b/include/lockSet.h @@ -81,9 +81,12 @@ public: /** * @brief Try to acquire all locks in order; back off if acquisition fails * @param lockvoker The LockerAndInvoker attempting to acquire the locks + * @param firstFailedQutex Output parameter to receive the first Qutex that + * failed acquisition (can be nullptr) * @return true if all locks were acquired, false otherwise */ - bool tryAcquireOrBackOff(LockerAndInvokerBase &lockvoker); + bool tryAcquireOrBackOff( + LockerAndInvokerBase &lockvoker, Qutex *firstFailedQutex = nullptr); void unregisterFromQutexQueues(); /** diff --git a/include/qutex.h b/include/qutex.h index 18af07a..ad82c8e 100644 --- a/include/qutex.h +++ b/include/qutex.h @@ -24,8 +24,8 @@ public: /** * @brief Constructor */ - Qutex() - : isOwned(false) + Qutex(const std::string &_name) + : isOwned(false), name(_name) {} /** @@ -88,6 +88,7 @@ public: SpinLock lock; LockerAndInvokerBase::List queue; bool isOwned; + std::string name; }; } // namespace smo diff --git a/include/serializedAsynchronousContinuation.h b/include/serializedAsynchronousContinuation.h index a8cded8..e146c9c 100644 --- a/include/serializedAsynchronousContinuation.h +++ b/include/serializedAsynchronousContinuation.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -63,7 +65,8 @@ public: : LockerAndInvokerBase(&serializedContinuation), serializedContinuation(serializedContinuation), target(target), - invocationTarget(std::move(invocationTarget)) + invocationTarget(std::move(invocationTarget)), + creationTimestamp(std::chrono::steady_clock::now()) { firstWake(); } @@ -81,11 +84,28 @@ public: "executing on wrong ComponentThread"); } + Qutex *firstFailedQutexPtr = nullptr; + bool isDeadlockLikely = isDeadlockLikely(); + if (!serializedContinuation.requiredLocks.tryAcquireOrBackOff( - *this)) + *this, (isDeadlockLikely ? &firstFailedQutexPtr : nullptr))) { // Just allow this lockvoker to be dropped from its io_service. allowAwakening(); + + if (!isDeadlockLikely) + { return; } + + Qutex &firstFailedQutex = *firstFailedQutexPtr; + + std::cerr << __func__ << ": Deadlock likely: " + << "Lockvoker has been waiting for " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - creationTimestamp) + .count() + << "ms, failed on qutex @" << &firstFailedQutex + << " (" << firstFailedQutex.name << ")" << std::endl; + return; } @@ -137,9 +157,8 @@ public: target->getIoService().post(*this); } - /** - * @brief Allow awakening by resetting the awake flag - */ + private: + // Allow awakening by resetting the awake flag void allowAwakening() { serializedContinuation.isAwakeOrBeingAwakened.store(false); } @@ -158,11 +177,21 @@ public: awaken(true); } + // Check if CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS has elapsed since creation + bool isDeadlockLikely() const + { + auto now = std::chrono::steady_clock::now(); + auto elapsed = std::chrono::duration_cast( + now - creationTimestamp); + return elapsed.count() >= DEBUG_CONFIG_QUTEX_DEADLOCK_TIMEOUT_MS; + } + private: SerializedAsynchronousContinuation &serializedContinuation; InvocationTargetT invocationTarget; std::shared_ptr target; + std::chrono::steady_clock::time_point creationTimestamp; }; }; diff --git a/smocore/lockSet.cpp b/smocore/lockSet.cpp index 8b26f8f..f528fd9 100644 --- a/smocore/lockSet.cpp +++ b/smocore/lockSet.cpp @@ -47,7 +47,9 @@ void LockSet::registerInQutexQueues( } template -bool LockSet::tryAcquireOrBackOff(LockerAndInvokerBase &lockvoker) +bool LockSet::tryAcquireOrBackOff( + LockerAndInvokerBase &lockvoker, Qutex *firstFailedQutex + ) { if (!registeredInQutexQueues) { @@ -72,6 +74,10 @@ bool LockSet::tryAcquireOrBackOff(LockerAndInvokerBase &lockvoker if (!lockUsageDesc.first.get().tryAcquire( lockvoker, nRequiredLocks)) { + // Set the first failed qutex for debugging + if (firstFailedQutex) { + *firstFailedQutex = &lockUsageDesc.first.get(); + } break; }