From f8bf8083af8e3c80b8957ce7fda6cceabb077468 Mon Sep 17 00:00:00 2001 From: Hayodea Hakol Date: Sat, 27 Sep 2025 20:51:20 -0400 Subject: [PATCH] Locking: Add contin tracing to detect deadlocks We added the code to trace all the contins linked to a particular Lockvoker, into SerializedAsyncContinuation. This basically ensures that we'll almost never deal with a deadlock. So cool. --- include/asynchronousContinuation.h | 6 +- include/asynchronousContinuationChainLink.h | 3 + include/serializedAsynchronousContinuation.h | 14 ++++- .../serializedAsynchronousContinuation.cpp | 58 +++++++++++++++++-- 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/include/asynchronousContinuation.h b/include/asynchronousContinuation.h index cd76783..10e725f 100644 --- a/include/asynchronousContinuation.h +++ b/include/asynchronousContinuation.h @@ -38,7 +38,6 @@ public: std::shared_ptr> lifetimePreservingConveyance); - /** EXPLANATION: * When an exception is thrown in a an async callee, which pertains to an * error in the data given by the caller, we ought not to throw the @@ -67,6 +66,11 @@ public: { std::rethrow_exception(exception); } } + // Implement the virtual method from AsynchronousContinuationChainLink + virtual std::shared_ptr + getCallersContinuation() override + { return originalCallback.callerContinuation; } + public: Callback originalCallback; std::exception_ptr exception; diff --git a/include/asynchronousContinuationChainLink.h b/include/asynchronousContinuationChainLink.h index 5b5eb0c..63fb1b9 100644 --- a/include/asynchronousContinuationChainLink.h +++ b/include/asynchronousContinuationChainLink.h @@ -18,6 +18,9 @@ class AsynchronousContinuationChainLink { public: virtual ~AsynchronousContinuationChainLink() = default; + + virtual std::shared_ptr + getCallersContinuation() = 0; }; } // namespace smo diff --git a/include/serializedAsynchronousContinuation.h b/include/serializedAsynchronousContinuation.h index b019e59..48d634d 100644 --- a/include/serializedAsynchronousContinuation.h +++ b/include/serializedAsynchronousContinuation.h @@ -100,7 +100,14 @@ public: { return; } #ifdef CONFIG_ENABLE_DEBUG_LOCKS - handleLikelyDeadlock(*firstFailedQutexPtr); + Qutex &firstFailedQutex = *firstFailedQutexPtr; + bool isDeadlock = traceContinuationHistoryForDeadlockOn( + firstFailedQutex); + + if (!isDeadlock) + { return; } + + handleDeadlock(firstFailedQutex); #endif return; } @@ -186,12 +193,13 @@ public: #endif } +#ifdef CONFIG_ENABLE_DEBUG_LOCKS + bool traceContinuationHistoryForDeadlockOn(Qutex &firstFailedQutex); /** * @brief Handle a likely deadlock situation by logging debug information * @param firstFailedQutex The first qutex that failed acquisition */ -#ifdef CONFIG_ENABLE_DEBUG_LOCKS - void handleLikelyDeadlock(Qutex& firstFailedQutex); + void handleDeadlock(Qutex& firstFailedQutex); #endif private: diff --git a/smocore/serializedAsynchronousContinuation.cpp b/smocore/serializedAsynchronousContinuation.cpp index a5e4f56..19ffdea 100644 --- a/smocore/serializedAsynchronousContinuation.cpp +++ b/smocore/serializedAsynchronousContinuation.cpp @@ -5,22 +5,72 @@ namespace smo { #ifdef CONFIG_ENABLE_DEBUG_LOCKS + +template +template +bool +SerializedAsynchronousContinuation +::LockerAndInvoker +::traceContinuationHistoryForDeadlockOn(Qutex& firstFailedQutex) +{ + /** EXPLANATION: + * In this function we will trace through the chain of continuations that + * led up to this Lockvoker's continuation. For each continuation which is + * a SerializedAsynchronousContinuation, we check through its LockSet to see + * if it contains the lock that failed acquisition. If it does, we have a + * deadlock. + */ + + /* We can't start with the continuation directly referenced by this starting + * Lockvoker as it would contain the all locks we're currently trying to + * acquire...and rightly so because it's the continuation for this current + * lockvoker. + */ + for (std::shared_ptr currContin = + this->serializedContinuation.getCallersContinuation(); + currContin != nullptr; + currContin = currContin->getCallersContinuation()) + { + auto serializedCont = std::dynamic_pointer_cast< + SerializedAsynchronousContinuation>(currContin); + + if (serializedCont == nullptr) { continue; } + + // Check if the firstFailedQutex is in this continuation's LockSet + try { + const auto& lockUsageDesc = serializedCont->requiredLocks + .getLockUsageDesc(firstFailedQutex); + } catch (const std::runtime_error& e) { + std::cerr << __func__ << ": " << e.what() << std::endl; + continue; + } + + std::cout << __func__ << ":Deadlock detected: Found " + << "firstFailedQutex @" << &firstFailedQutex + << " (" << firstFailedQutex.name << ") in LockSet of " + << "SerializedAsynchronousContinuation @" + << serializedCont.get() << std::endl; + + return true; + } + + return false; +} + template template void SerializedAsynchronousContinuation ::LockerAndInvoker -::handleLikelyDeadlock(Qutex& firstFailedQutex) +::handleDeadlock(Qutex& firstFailedQutex) { - std::cerr << __func__ << ": Deadlock likely: " + std::cerr << __func__ << ": Deadlock: " << "Lockvoker has been waiting for " << std::chrono::duration_cast( std::chrono::steady_clock::now() - this->creationTimestamp) .count() << "ms, failed on qutex @" << &firstFailedQutex << " (" << firstFailedQutex.name << ")" << std::endl; - - } #endif