mirror of
https://github.com/latentPrion/libspinscale.git
synced 2026-02-27 14:36:04 +00:00
Initial commit of libspinscale library
This commit is contained in:
28
src/component.cpp
Normal file
28
src/component.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <spinscale/component.h>
|
||||
#include <spinscale/puppetApplication.h>
|
||||
#include <spinscale/marionette.h>
|
||||
|
||||
namespace sscl {
|
||||
|
||||
Component::Component(const std::shared_ptr<ComponentThread> &thread)
|
||||
: thread(thread)
|
||||
{
|
||||
}
|
||||
|
||||
PuppetComponent::PuppetComponent(
|
||||
PuppetApplication &parent, const std::shared_ptr<ComponentThread> &thread)
|
||||
: Component(thread),
|
||||
parent(parent)
|
||||
{
|
||||
}
|
||||
|
||||
namespace mrntt {
|
||||
|
||||
MarionetteComponent::MarionetteComponent(
|
||||
const std::shared_ptr<sscl::ComponentThread> &thread)
|
||||
: sscl::Component(thread)
|
||||
{
|
||||
}
|
||||
|
||||
} // namespace mrntt
|
||||
} // namespace sscl
|
||||
325
src/componentThread.cpp
Normal file
325
src/componentThread.cpp
Normal file
@@ -0,0 +1,325 @@
|
||||
#include <boostAsioLinkageFix.h>
|
||||
#include <unistd.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <boost/asio/io_service.hpp>
|
||||
#include <spinscale/asynchronousContinuation.h>
|
||||
#include <spinscale/callback.h>
|
||||
#include <spinscale/callableTracer.h>
|
||||
#include <spinscale/componentThread.h>
|
||||
#include <spinscale/marionette.h>
|
||||
|
||||
namespace sscl {
|
||||
|
||||
namespace mrntt {
|
||||
// Global variable to store the marionette thread ID
|
||||
// Default value is 0, but should be set by application code via setMarionetteThreadId()
|
||||
ThreadId marionetteThreadId = 0;
|
||||
|
||||
void setMarionetteThreadId(ThreadId id)
|
||||
{
|
||||
marionetteThreadId = id;
|
||||
}
|
||||
} // namespace mrntt
|
||||
|
||||
} // namespace sscl
|
||||
|
||||
namespace sscl {
|
||||
|
||||
thread_local std::shared_ptr<ComponentThread> thisComponentThread;
|
||||
|
||||
namespace mrntt {
|
||||
// Global marionette thread instance - defined here but initialized by application
|
||||
std::shared_ptr<MarionetteThread> thread;
|
||||
} // namespace mrntt
|
||||
|
||||
// Implementation of static method
|
||||
std::shared_ptr<MarionetteThread> ComponentThread::getMrntt()
|
||||
{
|
||||
return sscl::mrntt::thread;
|
||||
}
|
||||
|
||||
void MarionetteThread::initializeTls(void)
|
||||
{
|
||||
thisComponentThread = shared_from_this();
|
||||
}
|
||||
|
||||
void PuppetThread::initializeTls(void)
|
||||
{
|
||||
thisComponentThread = shared_from_this();
|
||||
}
|
||||
|
||||
bool ComponentThread::tlsInitialized(void)
|
||||
{
|
||||
return thisComponentThread != nullptr;
|
||||
}
|
||||
|
||||
const std::shared_ptr<ComponentThread> ComponentThread::getSelf(void)
|
||||
{
|
||||
if (!thisComponentThread)
|
||||
{
|
||||
throw std::runtime_error(std::string(__func__)
|
||||
+ ": TLS not initialized");
|
||||
}
|
||||
|
||||
return thisComponentThread;
|
||||
}
|
||||
|
||||
class PuppetThread::ThreadLifetimeMgmtOp
|
||||
: public PostedAsynchronousContinuation<threadLifetimeMgmtOpCbFn>
|
||||
{
|
||||
public:
|
||||
ThreadLifetimeMgmtOp(
|
||||
const std::shared_ptr<ComponentThread> &caller,
|
||||
const std::shared_ptr<PuppetThread> &target,
|
||||
Callback<threadLifetimeMgmtOpCbFn> callback)
|
||||
: PostedAsynchronousContinuation<threadLifetimeMgmtOpCbFn>(
|
||||
caller, callback),
|
||||
target(target)
|
||||
{}
|
||||
|
||||
public:
|
||||
const std::shared_ptr<PuppetThread> target;
|
||||
|
||||
public:
|
||||
void joltThreadReq1_posted(
|
||||
[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
std::cout << __func__ << ": Thread '" << target->name << "': handling "
|
||||
"JOLT request."
|
||||
<< "\n";
|
||||
|
||||
target->io_service.stop();
|
||||
callOriginalCb();
|
||||
}
|
||||
|
||||
void startThreadReq1_posted(
|
||||
[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
std::cout << __func__ << ": Thread '" << target->name << "': handling "
|
||||
"startThread."
|
||||
<< "\n";
|
||||
|
||||
// Execute private setup sequence here
|
||||
// This is where each thread would implement its specific initialization
|
||||
|
||||
callOriginalCb();
|
||||
}
|
||||
|
||||
void exitThreadReq1_mainQueue_posted(
|
||||
[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
std::cout << __func__ << ": Thread '" << target->name << "': handling "
|
||||
"exitThread (main queue)." << "\n";
|
||||
|
||||
target->cleanup();
|
||||
target->io_service.stop();
|
||||
callOriginalCb();
|
||||
}
|
||||
|
||||
void exitThreadReq1_pauseQueue_posted(
|
||||
[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
std::cout << __func__ << ": Thread '" << target->name << "': handling "
|
||||
"exitThread (pause queue)."<< "\n";
|
||||
|
||||
target->cleanup();
|
||||
target->pause_io_service.stop();
|
||||
target->io_service.stop();
|
||||
callOriginalCb();
|
||||
}
|
||||
|
||||
void pauseThreadReq1_posted(
|
||||
[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
std::cout << __func__ << ": Thread '" << target->name << "': handling "
|
||||
"pauseThread." << "\n";
|
||||
|
||||
/* We have to invoke the callback here before moving on because
|
||||
* our next operation is going to block the thread, so it won't
|
||||
* have a chance to invoke the callback until it's unblocked.
|
||||
*/
|
||||
callOriginalCb();
|
||||
target->pause_io_service.reset();
|
||||
target->pause_io_service.run();
|
||||
}
|
||||
|
||||
void resumeThreadReq1_posted(
|
||||
[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
std::cout << __func__ << ": Thread '" << target->name << "': handling "
|
||||
"resumeThread." << "\n";
|
||||
|
||||
target->pause_io_service.stop();
|
||||
callOriginalCb();
|
||||
}
|
||||
};
|
||||
|
||||
void ComponentThread::cleanup(void)
|
||||
{
|
||||
this->keepLooping = false;
|
||||
}
|
||||
|
||||
void PuppetThread::joltThreadReq(
|
||||
const std::shared_ptr<PuppetThread>& selfPtr,
|
||||
Callback<threadLifetimeMgmtOpCbFn> callback)
|
||||
{
|
||||
/** EXPLANATION:
|
||||
* We can't use shared_from_this() here because JOLTing occurs prior to
|
||||
* TLS being set up.
|
||||
*
|
||||
* We also can't use getSelf() as yet for the same reason: getSelf()
|
||||
* requires TLS to be set up.
|
||||
*
|
||||
* To obtain a sh_ptr to the caller, we just supply the mrntt thread since
|
||||
* JOLT is always invoked by the mrntt thread. The JOLT sequence that the
|
||||
* CRT main() function invokes on the mrntt thread is special since it
|
||||
* supplies cmdline args and envp.
|
||||
*
|
||||
* To obtain a sh_ptr to the target thread, we use the selfPtr parameter
|
||||
* passed in by the caller.
|
||||
*/
|
||||
if (id == sscl::mrntt::marionetteThreadId)
|
||||
{
|
||||
throw std::runtime_error(std::string(__func__)
|
||||
+ ": invoked on mrntt thread");
|
||||
}
|
||||
|
||||
std::shared_ptr<MarionetteThread> mrntt = sscl::mrntt::thread;
|
||||
|
||||
auto request = std::make_shared<ThreadLifetimeMgmtOp>(
|
||||
mrntt, selfPtr, callback);
|
||||
|
||||
this->getIoService().post(
|
||||
STC(std::bind(
|
||||
&ThreadLifetimeMgmtOp::joltThreadReq1_posted,
|
||||
request.get(), request)));
|
||||
}
|
||||
|
||||
// Thread management method implementations
|
||||
void PuppetThread::startThreadReq(Callback<threadLifetimeMgmtOpCbFn> callback)
|
||||
{
|
||||
std::shared_ptr<ComponentThread> caller = getSelf();
|
||||
auto request = std::make_shared<ThreadLifetimeMgmtOp>(
|
||||
caller, std::static_pointer_cast<PuppetThread>(shared_from_this()),
|
||||
callback);
|
||||
|
||||
this->getIoService().post(
|
||||
STC(std::bind(
|
||||
&ThreadLifetimeMgmtOp::startThreadReq1_posted,
|
||||
request.get(), request)));
|
||||
}
|
||||
|
||||
void PuppetThread::exitThreadReq(Callback<threadLifetimeMgmtOpCbFn> callback)
|
||||
{
|
||||
std::shared_ptr<ComponentThread> caller = getSelf();
|
||||
auto request = std::make_shared<ThreadLifetimeMgmtOp>(
|
||||
caller, std::static_pointer_cast<PuppetThread>(shared_from_this()),
|
||||
callback);
|
||||
|
||||
this->getIoService().post(
|
||||
STC(std::bind(
|
||||
&ThreadLifetimeMgmtOp::exitThreadReq1_mainQueue_posted,
|
||||
request.get(), request)));
|
||||
|
||||
pause_io_service.post(
|
||||
STC(std::bind(
|
||||
&ThreadLifetimeMgmtOp::exitThreadReq1_pauseQueue_posted,
|
||||
request.get(), request)));
|
||||
}
|
||||
|
||||
void PuppetThread::pauseThreadReq(Callback<threadLifetimeMgmtOpCbFn> callback)
|
||||
{
|
||||
if (id == sscl::mrntt::marionetteThreadId)
|
||||
{
|
||||
throw std::runtime_error(std::string(__func__)
|
||||
+ ": invoked on mrntt thread");
|
||||
}
|
||||
|
||||
std::shared_ptr<ComponentThread> caller = getSelf();
|
||||
auto request = std::make_shared<ThreadLifetimeMgmtOp>(
|
||||
caller, std::static_pointer_cast<PuppetThread>(shared_from_this()),
|
||||
callback);
|
||||
|
||||
this->getIoService().post(
|
||||
STC(std::bind(
|
||||
&ThreadLifetimeMgmtOp::pauseThreadReq1_posted,
|
||||
request.get(), request)));
|
||||
}
|
||||
|
||||
void PuppetThread::resumeThreadReq(Callback<threadLifetimeMgmtOpCbFn> callback)
|
||||
{
|
||||
if (id == sscl::mrntt::marionetteThreadId)
|
||||
{
|
||||
throw std::runtime_error(std::string(__func__)
|
||||
+ ": invoked on mrntt thread");
|
||||
}
|
||||
|
||||
// Post to the pause_io_service to unblock the paused thread
|
||||
std::shared_ptr<ComponentThread> caller = getSelf();
|
||||
auto request = std::make_shared<ThreadLifetimeMgmtOp>(
|
||||
caller, std::static_pointer_cast<PuppetThread>(shared_from_this()),
|
||||
callback);
|
||||
|
||||
pause_io_service.post(
|
||||
STC(std::bind(
|
||||
&ThreadLifetimeMgmtOp::resumeThreadReq1_posted,
|
||||
request.get(), request)));
|
||||
}
|
||||
|
||||
// CPU management method implementations
|
||||
int ComponentThread::getAvailableCpuCount()
|
||||
{
|
||||
int cpuCount = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (cpuCount <= 0)
|
||||
{
|
||||
throw std::runtime_error(std::string(__func__)
|
||||
+ ": Failed to determine CPU count");
|
||||
}
|
||||
|
||||
// Check if std::thread::hardware_concurrency() matches sysconf result
|
||||
unsigned int hwConcurrency = std::thread::hardware_concurrency();
|
||||
if (hwConcurrency != static_cast<unsigned int>(cpuCount))
|
||||
{
|
||||
std::cerr << "Warning: CPU count mismatch - "
|
||||
"std::thread::hardware_concurrency() = "
|
||||
<< hwConcurrency << ", sysconf(_SC_NPROCESSORS_ONLN) = "
|
||||
<< cpuCount << "\n";
|
||||
}
|
||||
|
||||
return cpuCount;
|
||||
}
|
||||
|
||||
void PuppetThread::pinToCpu(int cpuId)
|
||||
{
|
||||
if (cpuId < 0)
|
||||
{
|
||||
throw std::runtime_error(std::string(__func__)
|
||||
+ ": Invalid CPU ID: " + std::to_string(cpuId));
|
||||
}
|
||||
|
||||
cpu_set_t cpuset;
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(cpuId, &cpuset);
|
||||
|
||||
int result = pthread_setaffinity_np(
|
||||
thread.native_handle(), sizeof(cpu_set_t), &cpuset);
|
||||
if (result != 0)
|
||||
{
|
||||
throw std::runtime_error(std::string(__func__)
|
||||
+ ": Failed to pin thread to CPU " + std::to_string(cpuId)
|
||||
+ ": " + std::strerror(result));
|
||||
}
|
||||
|
||||
pinnedCpuId = cpuId;
|
||||
}
|
||||
|
||||
} // namespace sscl
|
||||
5
src/lockerAndInvokerBase.cpp
Normal file
5
src/lockerAndInvokerBase.cpp
Normal file
@@ -0,0 +1,5 @@
|
||||
#include <spinscale/lockerAndInvokerBase.h>
|
||||
|
||||
namespace sscl {
|
||||
|
||||
} // namespace sscl
|
||||
222
src/puppetApplication.cpp
Normal file
222
src/puppetApplication.cpp
Normal file
@@ -0,0 +1,222 @@
|
||||
#include <iostream>
|
||||
#include <spinscale/asynchronousContinuation.h>
|
||||
#include <spinscale/asynchronousLoop.h>
|
||||
#include <spinscale/callback.h>
|
||||
#include <spinscale/puppetApplication.h>
|
||||
#include <spinscale/componentThread.h>
|
||||
|
||||
namespace sscl {
|
||||
|
||||
PuppetApplication::PuppetApplication(
|
||||
const std::vector<std::shared_ptr<PuppetThread>> &threads)
|
||||
: componentThreads(threads)
|
||||
{
|
||||
}
|
||||
|
||||
class PuppetApplication::PuppetThreadLifetimeMgmtOp
|
||||
: public NonPostedAsynchronousContinuation<puppetThreadLifetimeMgmtOpCbFn>
|
||||
{
|
||||
public:
|
||||
PuppetThreadLifetimeMgmtOp(
|
||||
PuppetApplication &parent, unsigned int nThreads,
|
||||
Callback<puppetThreadLifetimeMgmtOpCbFn> callback)
|
||||
: NonPostedAsynchronousContinuation<puppetThreadLifetimeMgmtOpCbFn>(callback),
|
||||
loop(nThreads),
|
||||
parent(parent)
|
||||
{}
|
||||
|
||||
public:
|
||||
AsynchronousLoop loop;
|
||||
PuppetApplication &parent;
|
||||
|
||||
public:
|
||||
void joltAllPuppetThreadsReq1(
|
||||
[[maybe_unused]] std::shared_ptr<PuppetThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
loop.incrementSuccessOrFailureDueTo(true);
|
||||
if (!loop.isComplete()) {
|
||||
return;
|
||||
}
|
||||
|
||||
parent.threadsHaveBeenJolted = true;
|
||||
callOriginalCb();
|
||||
}
|
||||
|
||||
void executeGenericOpOnAllPuppetThreadsReq1(
|
||||
[[maybe_unused]] std::shared_ptr<PuppetThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
loop.incrementSuccessOrFailureDueTo(true);
|
||||
if (!loop.isComplete()) {
|
||||
return;
|
||||
}
|
||||
|
||||
callOriginalCb();
|
||||
}
|
||||
|
||||
void exitAllPuppetThreadsReq1(
|
||||
[[maybe_unused]] std::shared_ptr<PuppetThreadLifetimeMgmtOp> context
|
||||
)
|
||||
{
|
||||
loop.incrementSuccessOrFailureDueTo(true);
|
||||
if (!loop.isComplete()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto& thread : parent.componentThreads) {
|
||||
thread->thread.join();
|
||||
}
|
||||
|
||||
callOriginalCb();
|
||||
}
|
||||
};
|
||||
|
||||
void PuppetApplication::joltAllPuppetThreadsReq(
|
||||
Callback<puppetThreadLifetimeMgmtOpCbFn> callback
|
||||
)
|
||||
{
|
||||
if (threadsHaveBeenJolted)
|
||||
{
|
||||
std::cout << "Mrntt: All puppet threads already JOLTed. "
|
||||
<< "Skipping JOLT request." << "\n";
|
||||
callback.callbackFn();
|
||||
return;
|
||||
}
|
||||
|
||||
// If no threads, set flag and call callback immediately
|
||||
if (componentThreads.size() == 0 && callback.callbackFn)
|
||||
{
|
||||
threadsHaveBeenJolted = true;
|
||||
callback.callbackFn();
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a counter to track when all threads have been jolted
|
||||
auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
|
||||
*this, componentThreads.size(), callback);
|
||||
|
||||
for (auto& thread : componentThreads)
|
||||
{
|
||||
thread->joltThreadReq(
|
||||
thread,
|
||||
{request, std::bind(
|
||||
&PuppetThreadLifetimeMgmtOp::joltAllPuppetThreadsReq1,
|
||||
request.get(), request)});
|
||||
}
|
||||
}
|
||||
|
||||
void PuppetApplication::startAllPuppetThreadsReq(
|
||||
Callback<puppetThreadLifetimeMgmtOpCbFn> callback
|
||||
)
|
||||
{
|
||||
// If no threads, call callback immediately
|
||||
if (componentThreads.size() == 0 && callback.callbackFn)
|
||||
{
|
||||
callback.callbackFn();
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a counter to track when all threads have started
|
||||
auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
|
||||
*this, componentThreads.size(), callback);
|
||||
|
||||
for (auto& thread : componentThreads)
|
||||
{
|
||||
thread->startThreadReq(
|
||||
{request, std::bind(
|
||||
&PuppetThreadLifetimeMgmtOp::executeGenericOpOnAllPuppetThreadsReq1,
|
||||
request.get(), request)});
|
||||
}
|
||||
}
|
||||
|
||||
void PuppetApplication::pauseAllPuppetThreadsReq(
|
||||
Callback<puppetThreadLifetimeMgmtOpCbFn> callback
|
||||
)
|
||||
{
|
||||
// If no threads, call callback immediately
|
||||
if (componentThreads.size() == 0 && callback.callbackFn)
|
||||
{
|
||||
callback.callbackFn();
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a counter to track when all threads have paused
|
||||
auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
|
||||
*this, componentThreads.size(), callback);
|
||||
|
||||
for (auto& thread : componentThreads)
|
||||
{
|
||||
thread->pauseThreadReq(
|
||||
{request, std::bind(
|
||||
&PuppetThreadLifetimeMgmtOp::executeGenericOpOnAllPuppetThreadsReq1,
|
||||
request.get(), request)});
|
||||
}
|
||||
}
|
||||
|
||||
void PuppetApplication::resumeAllPuppetThreadsReq(
|
||||
Callback<puppetThreadLifetimeMgmtOpCbFn> callback
|
||||
)
|
||||
{
|
||||
// If no threads, call callback immediately
|
||||
if (componentThreads.size() == 0 && callback.callbackFn)
|
||||
{
|
||||
callback.callbackFn();
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a counter to track when all threads have resumed
|
||||
auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
|
||||
*this, componentThreads.size(), callback);
|
||||
|
||||
for (auto& thread : componentThreads)
|
||||
{
|
||||
thread->resumeThreadReq(
|
||||
{request, std::bind(
|
||||
&PuppetThreadLifetimeMgmtOp::executeGenericOpOnAllPuppetThreadsReq1,
|
||||
request.get(), request)});
|
||||
}
|
||||
}
|
||||
|
||||
void PuppetApplication::exitAllPuppetThreadsReq(
|
||||
Callback<puppetThreadLifetimeMgmtOpCbFn> callback
|
||||
)
|
||||
{
|
||||
// If no threads, call callback immediately
|
||||
if (componentThreads.size() == 0 && callback.callbackFn)
|
||||
{
|
||||
callback.callbackFn();
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a counter to track when all threads have exited
|
||||
auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
|
||||
*this, componentThreads.size(), callback);
|
||||
|
||||
for (auto& thread : componentThreads)
|
||||
{
|
||||
thread->exitThreadReq(
|
||||
{request, std::bind(
|
||||
&PuppetThreadLifetimeMgmtOp::executeGenericOpOnAllPuppetThreadsReq1,
|
||||
request.get(), request)});
|
||||
}
|
||||
}
|
||||
|
||||
void PuppetApplication::distributeAndPinThreadsAcrossCpus()
|
||||
{
|
||||
int cpuCount = ComponentThread::getAvailableCpuCount();
|
||||
|
||||
// Distribute and pin threads across CPUs
|
||||
int threadIndex = 0;
|
||||
for (auto& thread : componentThreads)
|
||||
{
|
||||
int targetCpu = threadIndex % cpuCount;
|
||||
thread->pinToCpu(targetCpu);
|
||||
++threadIndex;
|
||||
}
|
||||
|
||||
std::cout << __func__ << ": Distributed " << threadIndex << " threads "
|
||||
<< "across " << cpuCount << " CPUs\n";
|
||||
}
|
||||
|
||||
} // namespace sscl
|
||||
380
src/qutex.cpp
Normal file
380
src/qutex.cpp
Normal file
@@ -0,0 +1,380 @@
|
||||
#include <spinscale/qutex.h>
|
||||
#include <spinscale/lockerAndInvokerBase.h>
|
||||
|
||||
namespace sscl {
|
||||
|
||||
bool Qutex::tryAcquire(
|
||||
const LockerAndInvokerBase &tryingLockvoker, int nRequiredLocks
|
||||
)
|
||||
{
|
||||
lock.acquire();
|
||||
|
||||
const int qNItems = static_cast<int>(queue.size());
|
||||
|
||||
// If queue is empty, this should never happen since we register before trying to acquire
|
||||
if (qNItems < 1)
|
||||
{
|
||||
lock.release();
|
||||
|
||||
throw std::runtime_error(
|
||||
std::string(__func__) +
|
||||
": tryAcquire called on empty queue - this should never happen");
|
||||
}
|
||||
|
||||
// If lock is already owned, fail
|
||||
if (isOwned)
|
||||
{
|
||||
lock.release();
|
||||
return false;
|
||||
}
|
||||
|
||||
/** EXPLANATION:
|
||||
* Calculate how many items from the rear we need to scan
|
||||
*
|
||||
* For nRequiredLocks=1: must be at front (nRearItemsToScan = qNItems, scan all)
|
||||
* For nRequiredLocks=2: must be in top 50% (nRearItemsToScan = qNItems/2)
|
||||
* For nRequiredLocks=3: must be in top 66% (nRearItemsToScan = qNItems/3)
|
||||
* etc.
|
||||
*/
|
||||
const int nRearItemsToScan = qNItems / nRequiredLocks;
|
||||
|
||||
// If we're the only item in queue, or if the fraction calculation
|
||||
// results in 0 rear items to scan, we automatically succeed
|
||||
if (qNItems == 1 || nRearItemsToScan < 1)
|
||||
{
|
||||
isOwned = true;
|
||||
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
|
||||
// Use the stored iterator from the LockSet
|
||||
auto it = tryingLockvoker.getLockvokerIteratorForQutex(*this);
|
||||
currOwner = *it;
|
||||
#endif
|
||||
lock.release();
|
||||
return true;
|
||||
}
|
||||
|
||||
// For single-lock requests, they must be at the front of the queue
|
||||
if (nRequiredLocks == 1)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
if ((*queue.front()) == tryingLockvoker)
|
||||
{
|
||||
isOwned = true;
|
||||
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
|
||||
currOwner = queue.front();
|
||||
#endif
|
||||
ret = true;
|
||||
}
|
||||
else {
|
||||
ret = false;
|
||||
}
|
||||
|
||||
lock.release();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// For multi-lock requests, check if the lockvoker is in the rear portion
|
||||
// If it's NOT in the rear portion, then it's in the top X% and should succeed
|
||||
auto rIt = queue.rbegin();
|
||||
auto rEndIt = queue.rend();
|
||||
bool foundInRear = false;
|
||||
|
||||
for (int i = 0; i < nRearItemsToScan && rIt != rEndIt; ++rIt, ++i)
|
||||
{
|
||||
if ((**rIt) == tryingLockvoker)
|
||||
{
|
||||
foundInRear = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (foundInRear)
|
||||
{
|
||||
// Found in rear portion - not in top X%, so fail
|
||||
lock.release();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Not found in rear portion - must be in top X%, so succeed
|
||||
isOwned = true;
|
||||
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
|
||||
// Use the stored iterator from the LockSet
|
||||
auto it = tryingLockvoker.getLockvokerIteratorForQutex(*this);
|
||||
currOwner = *it;
|
||||
#endif
|
||||
lock.release();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Qutex::backoff(
|
||||
const LockerAndInvokerBase &failedAcquirer, int nRequiredLocks
|
||||
)
|
||||
{
|
||||
lock.acquire();
|
||||
|
||||
const int nQItems = static_cast<int>(queue.size());
|
||||
|
||||
if (nQItems < 1)
|
||||
{
|
||||
lock.release();
|
||||
|
||||
throw std::runtime_error(
|
||||
std::string(__func__) +
|
||||
": backoff called on empty queue - this should never happen");
|
||||
}
|
||||
|
||||
/* Check if failedAcquirer is at the front of the queue with
|
||||
* nRequiredLocks == 1. This should never happen because an
|
||||
* acquirer at the front of the queue with nRequiredLocks == 1
|
||||
* should always succeed.
|
||||
*/
|
||||
const LockerAndInvokerBase& oldFront = *queue.front();
|
||||
if (oldFront == failedAcquirer && nRequiredLocks == 1)
|
||||
{
|
||||
lock.release();
|
||||
|
||||
throw std::runtime_error(
|
||||
std::string(__func__) +
|
||||
": Failed acquirer is at front of queue with nRequiredLocks==1 - "
|
||||
"acquirer at front of queue with nRequiredLocks==1 should always "
|
||||
"succeed.");
|
||||
}
|
||||
|
||||
// Rotate queue members if failedAcquirer is at front of queue
|
||||
if (oldFront == failedAcquirer && nQItems > 1)
|
||||
{
|
||||
/** EXPLANATION:
|
||||
* Rotate the top LockSet.size() items in the queue by moving
|
||||
* the failedAcquirer to the last position in the top
|
||||
* LockSet.size() items within the queue.
|
||||
*
|
||||
* I.e: if queue.size()==20, and lockSet.size()==5, then move
|
||||
* failedAcquirer from the front to the 5th position in the queue,
|
||||
* which should push the other 4 items forward.
|
||||
* If queue.size()==3 and LockSet.size()==5, then just
|
||||
* push_back(failedAcquirer).
|
||||
*
|
||||
* It is impossible for a Qutex queue to have only one
|
||||
* item in it, yet for that Lockvoker item to have failed to
|
||||
* acquire the Qutex. Being the only item in the ticketQ
|
||||
* means that you must succeed at acquiring the Qutex.
|
||||
*/
|
||||
int indexOfItemToInsertCurrFrontBefore;
|
||||
if (nQItems > nRequiredLocks) {
|
||||
indexOfItemToInsertCurrFrontBefore = nRequiredLocks;
|
||||
} else
|
||||
{
|
||||
// -1 means insert at back -- i.e, use list::end() as insertPos.
|
||||
indexOfItemToInsertCurrFrontBefore = -1;
|
||||
}
|
||||
|
||||
/* EXPLANATION:
|
||||
* Rotate them here.
|
||||
*
|
||||
* The reason why we do this rotation is to avoid a particular kind
|
||||
* of deadlock wherein a grid of async requests is perfectly
|
||||
* configured so as to guarantee that none of them can make any
|
||||
* forward progress unless they get reordered.
|
||||
*
|
||||
* Consider 2 different locks with 2 different items in them
|
||||
* each, both of which come from 2 particular requests:
|
||||
* Qutex1: Lockvoker1, Lv2
|
||||
* Qutex2: Lv2, Lv1
|
||||
*
|
||||
* Moreover, both of these lockvokers have requiredLocks.size()==2,
|
||||
* and the particular 2 locks that each one requires are indeed
|
||||
* Qutex1 and Qutex2.
|
||||
*
|
||||
* This particular setup basically means that in TL1's queue, Lv1
|
||||
* will wakeup since it's at the front of TL1. It'll successfully
|
||||
* acquire TL1 (since it's at the front), and then it'll try to
|
||||
* acquire TL2. But since Lv1 isn't in the top 50% of items in TL2's
|
||||
* queue, Lv1 will fail to acquire TL2.
|
||||
*
|
||||
* Then similarly, in TL2's queue, Lv2 will wakeup since it's at
|
||||
* the front. Again, it'll successfully acquire TL2 since it's at
|
||||
* the front of TL2's queue. But then it'll try to acquire TL1.
|
||||
* Since it's not in the top 50% of TL1's enqueued items, it'll fail
|
||||
* to acquire TL1.
|
||||
*
|
||||
* N.B: This type of perfectly ordered deadlock can occur in any
|
||||
* kind of NxN situation where ticketQ.size()==requiredLocks.size().
|
||||
* That could be 4x4, 5x5, 6x6, etc. It doesn't happen in 1x1
|
||||
* because a Lockvoker that only requires one lock will always just
|
||||
* succeed if it's at the front of its queue.
|
||||
*
|
||||
* This state of affairs is stable and will persist unless these
|
||||
* queues are reordered in some way. Hence: that's why we rotate the
|
||||
* items in a QutexQ after backing off of it. Backing off means
|
||||
* Not necessarily that the calling LockVoker failed to acquire
|
||||
* THIS PARTICULAR Qutex, but rather than it failed to acquire
|
||||
* ALL of its required locks.
|
||||
*
|
||||
* Hence, if we are backing out, we should also rotate the items
|
||||
* in the queue if the current front item is the failed acquirer.
|
||||
* So that's why we do this rotation here.
|
||||
*/
|
||||
|
||||
// Find the iterator for the failed acquirer (which is at the front)
|
||||
auto frontIt = queue.begin();
|
||||
|
||||
// Find the position to insert before using indexOfItemToInsertCurrFrontBefore
|
||||
auto insertPos = queue.begin();
|
||||
if (indexOfItemToInsertCurrFrontBefore == -1)
|
||||
{
|
||||
// -1 means insert at the back (before end())
|
||||
insertPos = queue.end();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Move to the specified position (0-based index)
|
||||
for (
|
||||
int i = 0;
|
||||
i < indexOfItemToInsertCurrFrontBefore
|
||||
&& insertPos != queue.end(); ++i)
|
||||
{
|
||||
++insertPos;
|
||||
}
|
||||
}
|
||||
|
||||
/** NOTE:
|
||||
* According to https://en.cppreference.com/w/cpp/container/list/splice:
|
||||
* "No iterators or references become invalidated. If *this and other
|
||||
* refer to different objects, the iterators to the transferred elements
|
||||
* now refer into *this, not into other."
|
||||
*
|
||||
* So our stored iterator inside of LockSet will still be valid after
|
||||
* the splice, and we can use it to unregister the lockvoker later on.
|
||||
*/
|
||||
queue.splice(insertPos, queue, frontIt);
|
||||
}
|
||||
|
||||
isOwned = false;
|
||||
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
|
||||
currOwner = nullptr;
|
||||
#endif
|
||||
LockerAndInvokerBase &newFront = *queue.front();
|
||||
|
||||
lock.release();
|
||||
|
||||
/** EXPLANATION:
|
||||
* Why should this never happen? Well, if we were at the front of the queue
|
||||
* and we failed to acquire the lock, we should have been rotated away from
|
||||
* the front. On the other hand, if we were not at the front of the queue
|
||||
* and we failed to acquire the lock, then we weren't at the front of the
|
||||
* queue to begin with.
|
||||
* The exception is if the queue has only one item in it.
|
||||
*
|
||||
* Hence there ought to be no way for the failedAcquirer to be at the front
|
||||
* of the queue at this point UNLESS the queue has only one item in it.
|
||||
*/
|
||||
if (newFront == failedAcquirer && nQItems > 1)
|
||||
{
|
||||
throw std::runtime_error(
|
||||
std::string(__func__) +
|
||||
": Failed acquirer is at the front of the queue at the end of "
|
||||
"backoff, yet nQItems > 1 - this should never happen");
|
||||
}
|
||||
|
||||
/** EXPLANATION:
|
||||
* We should always awaken whoever is at the front of the queue, even if
|
||||
* we didn't rotate. Why? Consider this scenario:
|
||||
*
|
||||
* Lv1 has LockSet.size==1. Lv2 has LockSet.size==3.
|
||||
* Lv1's required lock overlaps with Lv2's set of 3 required locks.
|
||||
* Lv1 registers itself in its 1 qutex's queue.
|
||||
* Lv2 registers itself in all 3 of its qutexes' queues.
|
||||
* Lv2 acquires the lock that it needs in common with Lv1.
|
||||
* (Assume that Lv2 was not at the front of the common qutex's
|
||||
* internal queue -- it only needed to be in the top 66%.)
|
||||
* Lv1 tries to acquire the common lock and fails. It gets taken off of
|
||||
* its io_service. It's now asleep until it gets
|
||||
* re-added into an io_service.
|
||||
* Lv2 fails to acquire the other 2 locks it needs and backoff()s from
|
||||
* the common lock it shares with Lv1.
|
||||
*
|
||||
* If Lv2 does NOT awaken the item at the front of the common lock's
|
||||
* queue (aka: Lv1), then Lv1 is doomed to never wake up again.
|
||||
*
|
||||
* Hence: backout() callers should always wake up the lockvoker at the
|
||||
* front of their queue before leaving.
|
||||
*
|
||||
* The exception is if the item at the front is the backout() caller
|
||||
* itself. This can happen if, for example a multi-locking lockvoker
|
||||
* is backing off of a qutex within which it's the only waiter.
|
||||
*/
|
||||
if (nQItems > 1) {
|
||||
newFront.awaken();
|
||||
}
|
||||
}
|
||||
|
||||
void Qutex::release()
|
||||
{
|
||||
lock.acquire();
|
||||
|
||||
/** EXPLAINATION:
|
||||
* A qutex must not have its release() called when it's not owned. The
|
||||
* plumbing required to permit that is a bit excessive, and we have
|
||||
* instrumentation to track early qutex release()ing in
|
||||
* SerializedAsynchronousContinuation.
|
||||
*/
|
||||
if (!isOwned
|
||||
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
|
||||
|| currOwner == nullptr
|
||||
#endif
|
||||
)
|
||||
{
|
||||
throw std::runtime_error(
|
||||
std::string(__func__) +
|
||||
": release() called on unowned qutex - this should never happen");
|
||||
}
|
||||
|
||||
isOwned = false;
|
||||
#ifdef CONFIG_ENABLE_DEBUG_LOCKS
|
||||
currOwner = nullptr;
|
||||
#endif
|
||||
|
||||
// It's possible for there to be 0 items left in queue after unregistering.
|
||||
if (queue.empty())
|
||||
{
|
||||
lock.release();
|
||||
return;
|
||||
}
|
||||
|
||||
/** EXPLANATION:
|
||||
* It would be nice to be able to optimize by only awakening if the
|
||||
* release()ing lockvoker was at the front of the qutexQ, but if we
|
||||
* don't unconditionally wakeup() the front item, we could get lost
|
||||
* wakeups. Consider:
|
||||
*
|
||||
* Lv1 only has 1 requiredLock.
|
||||
* Lv2 has 3 requiredLocks. One of its requiredLocks overlaps with
|
||||
* Lv1's single requiredLock. So they both share a common lock.
|
||||
* Lv3's currently owns Lv1 & Lv2's common requiredLock.
|
||||
* Lv3 release()s that common lock.
|
||||
* Lv1 happens to be next in queue after Lv3 unregisters itself.
|
||||
* Lv3 wakes up Lv1.
|
||||
* Just before Lv1 can acquire the common lock, Lv2 acquires it now,
|
||||
* because it only needs to be in the top 66% to succeed.
|
||||
* Lv1 checks the currOwner and sees that it's owned. Lv1 is now
|
||||
* dequeued from its io_service. It won't be awakened until someone
|
||||
* awakens it.
|
||||
* Lv2 finishes its critical section and releas()es the common lock.
|
||||
* Lv2 was not at the front of the qutexQ, so it does NOT awaken the
|
||||
* current item at the front.
|
||||
*
|
||||
* Thus, Lv1 never gets awakened again. The end.
|
||||
* This also means that no LockSet.size()==1 lockvoker will ever be able
|
||||
* to run again since they can only run if they are at the front of the
|
||||
* qutexQ.
|
||||
*
|
||||
* Therefore we must always awaken the front item when releas()ing.
|
||||
*/
|
||||
LockerAndInvokerBase &front = *queue.front();
|
||||
|
||||
lock.release();
|
||||
|
||||
front.awaken();
|
||||
}
|
||||
|
||||
} // namespace sscl
|
||||
393
src/qutexAcquisitionHistoryTracker.cpp
Normal file
393
src/qutexAcquisitionHistoryTracker.cpp
Normal file
@@ -0,0 +1,393 @@
|
||||
#include <spinscale/qutexAcquisitionHistoryTracker.h>
|
||||
#include <spinscale/serializedAsynchronousContinuation.h>
|
||||
#include <spinscale/qutex.h>
|
||||
#include <spinscale/dependencyGraph.h>
|
||||
#include <memory>
|
||||
#include <forward_list>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
namespace sscl {
|
||||
|
||||
void DependencyGraph::addNode(const Node& node)
|
||||
{
|
||||
adjacencyList[node]; // Creates empty set if node doesn't exist
|
||||
}
|
||||
|
||||
void DependencyGraph::addEdge(const Node& source, const Node& target)
|
||||
{
|
||||
addNode(source);
|
||||
addNode(target);
|
||||
adjacencyList[source].insert(target);
|
||||
}
|
||||
|
||||
std::vector<std::vector<DependencyGraph::Node>>
|
||||
DependencyGraph::findCycles() const
|
||||
{
|
||||
std::unordered_set<Node> visited;
|
||||
std::unordered_set<Node> recursionStack;
|
||||
std::vector<std::vector<Node>> cycles;
|
||||
std::vector<Node> path;
|
||||
|
||||
for (const auto& entry : adjacencyList)
|
||||
{
|
||||
const Node& node = entry.first;
|
||||
if (visited.find(node) == visited.end()) {
|
||||
dfsCycleDetection(node, visited, recursionStack, path, cycles);
|
||||
}
|
||||
}
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
||||
bool DependencyGraph::hasCycles() const
|
||||
{
|
||||
std::unordered_set<Node> visited;
|
||||
std::unordered_set<Node> recursionStack;
|
||||
std::vector<std::vector<Node>> cycles;
|
||||
std::vector<Node> path;
|
||||
|
||||
for (const auto& entry : adjacencyList)
|
||||
{
|
||||
const Node& node = entry.first;
|
||||
if (visited.find(node) == visited.end())
|
||||
{
|
||||
dfsCycleDetection(node, visited, recursionStack, path, cycles);
|
||||
if (!cycles.empty())
|
||||
{ return true; }
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t DependencyGraph::getNodeCount() const
|
||||
{
|
||||
return adjacencyList.size();
|
||||
}
|
||||
|
||||
void DependencyGraph::dfsCycleDetection(
|
||||
const Node& node,
|
||||
std::unordered_set<Node>& visited,
|
||||
std::unordered_set<Node>& recursionStack,
|
||||
std::vector<Node>& path,
|
||||
std::vector<std::vector<Node>>& cycles
|
||||
)
|
||||
const
|
||||
{
|
||||
// Mark current node as visited and add to recursion stack
|
||||
visited.insert(node);
|
||||
recursionStack.insert(node);
|
||||
path.push_back(node);
|
||||
|
||||
// Check all adjacent nodes
|
||||
auto it = adjacencyList.find(node);
|
||||
if (it != adjacencyList.end())
|
||||
{
|
||||
for (const auto& adjacent : it->second)
|
||||
{
|
||||
// If adjacent node is in recursion stack, we found a cycle
|
||||
if (recursionStack.find(adjacent) != recursionStack.end())
|
||||
{
|
||||
// Find the start of the cycle in the current path
|
||||
auto cycleStart = std::find(path.begin(), path.end(), adjacent);
|
||||
if (cycleStart != path.end())
|
||||
{
|
||||
std::vector<Node> cycle(cycleStart, path.end());
|
||||
cycle.push_back(adjacent); // Complete the cycle
|
||||
cycles.push_back(cycle);
|
||||
}
|
||||
}
|
||||
// If adjacent node hasn't been visited, recurse
|
||||
else if (visited.find(adjacent) == visited.end())
|
||||
{
|
||||
dfsCycleDetection(
|
||||
adjacent, visited, recursionStack, path, cycles);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from recursion stack and path when backtracking
|
||||
recursionStack.erase(node);
|
||||
path.pop_back();
|
||||
}
|
||||
|
||||
// QutexAcquisitionHistoryTracker implementation
|
||||
std::unique_ptr<DependencyGraph>
|
||||
QutexAcquisitionHistoryTracker::generateGraph(bool dontAcquireLock)
|
||||
{
|
||||
auto graph = std::make_unique<DependencyGraph>();
|
||||
|
||||
if (!dontAcquireLock) {
|
||||
acquisitionHistoryLock.acquire();
|
||||
}
|
||||
|
||||
// First pass: Add all continuations as nodes
|
||||
for (const auto& entry : acquisitionHistory)
|
||||
{
|
||||
const auto& continuation = entry.first;
|
||||
graph->addNode(continuation);
|
||||
}
|
||||
|
||||
// Second pass: Add edges based on lock dependencies
|
||||
for (const auto& entry : acquisitionHistory)
|
||||
{
|
||||
const auto& continuation = entry.first;
|
||||
const auto& historyEntry = entry.second;
|
||||
const auto& wantedLock = historyEntry.first;
|
||||
const auto& heldLocks = historyEntry.second;
|
||||
|
||||
if (!heldLocks) { continue; }
|
||||
|
||||
// Check if any other continuation holds the lock this continuation wants
|
||||
for (const auto& otherEntry : acquisitionHistory)
|
||||
{
|
||||
const auto& otherContinuation = otherEntry.first;
|
||||
const auto& otherHistoryEntry = otherEntry.second;
|
||||
const auto& otherHeldLocks = otherHistoryEntry.second;
|
||||
|
||||
// Skip self-comparison
|
||||
if (continuation == otherContinuation) { continue; }
|
||||
if (!otherHeldLocks) { continue; }
|
||||
|
||||
// Check if other continuation holds the wanted lock
|
||||
for (const auto& otherHeldLock : *otherHeldLocks)
|
||||
{
|
||||
if (&otherHeldLock.get() == &wantedLock.get())
|
||||
{
|
||||
// Add edge: continuation -> otherContinuation
|
||||
// (continuation wants a lock held by otherContinuation)
|
||||
graph->addEdge(continuation, otherContinuation);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!dontAcquireLock) {
|
||||
acquisitionHistoryLock.release();
|
||||
}
|
||||
|
||||
return graph;
|
||||
}
|
||||
|
||||
/** EXPLANATION - GRIDLOCK DETECTION ALGORITHM:
|
||||
* This file implements gridlock detection algorithms that use a central
|
||||
* acquisition history to track all lockvokers suspected of being gridlocked.
|
||||
*
|
||||
* ALGORITHM OVERVIEW:
|
||||
* 1. When a lockvoker finds that DEADLOCK_TIMEOUT_MS has elapsed and it
|
||||
* still can't acquire a particular lock (firstFailedQutex), it creates
|
||||
* a new entry in a global acquisition history.
|
||||
*
|
||||
* 2. The acquisition history is an unordered_map with:
|
||||
* - Key: std::shared_ptr<AsynchronousContinuationChainLink>
|
||||
* (the timed-out lockvoker's continuation)
|
||||
* - Value: std::pair<
|
||||
* std::reference_wrapper<Qutex>,
|
||||
* std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>>
|
||||
* * pair.first: The firstFailedQutex that this lockvoker WANTS but
|
||||
* can't acquire. This metadata is essential for later-arriving
|
||||
* entrants to analyze what their predecessor timed-out sequences
|
||||
* want.
|
||||
* * pair.second: A unique_ptr to a list of all acquired Qutexes in this
|
||||
* lockvoker's continuation history.
|
||||
*
|
||||
* 3. Each timed-out lockvoker:
|
||||
* a) Adds itself to the acquisition history map with its wanted lock and
|
||||
* acquired locks
|
||||
* b) Iterates through all OTHER entries in the map (excluding itself)
|
||||
* c) For each other entry, checks if that entry's acquired locks
|
||||
* (pair.second) contains the lock that this lockvoker wants
|
||||
* (aka: firstFailedQutex/pair.first)
|
||||
* d) If found, we have detected a gridlock: two sequences where at least
|
||||
* one wants a lock held by the other, and the other wants a lock that
|
||||
* it can't acquire.
|
||||
*
|
||||
* GRIDLOCK CONDITION:
|
||||
* A gridlock exists when we find a circular chain of dependencies:
|
||||
* - Lockvoker A wants LockX but can't acquire it (held by Lockvoker B)
|
||||
* - Lockvoker B wants LockY but can't acquire it (held by Lockvoker C, D, etc.)
|
||||
* - The chain must be circular (eventually leading back to Lockvoker A or another
|
||||
* lockvoker in the chain) to ensure it's a true gridlock, not just a delay
|
||||
*
|
||||
* TIMED DELAY, I/O DELAY, or LONG-RUNNING OPERATION FALSE-POSITIVE:
|
||||
* Without circularity detection, we could incorrectly flag a simple delay, I/O
|
||||
* delay, or long-running operation as a gridlock. For example: Lockvoker A
|
||||
* wants LockX (held by Lockvoker B), and Lockvoker B is currently in a 10-second
|
||||
* sleep/delay. When B wakes up, it will release LockX, allowing A to proceed.
|
||||
* This is not a gridlock - it's just A waiting longer than DEADLOCK_TIMEOUT_MS
|
||||
* for B to finish its work. True gridlocks require circular dependencies where
|
||||
* no sequence can make progress because they're all waiting for each other in
|
||||
* a cycle.
|
||||
*
|
||||
* The central history metadata enables us to detect complex gridlocks involving
|
||||
* multiple lockvokers (2, 3, 4, 5+ sequences) by building up the acquisition
|
||||
* history over time as different lockvokers timeout and add their information.
|
||||
*/
|
||||
|
||||
bool QutexAcquisitionHistoryTracker
|
||||
::heuristicallyTraceContinuationHistoryForGridlockOn(
|
||||
Qutex &firstFailedQutex,
|
||||
std::shared_ptr<AsynchronousContinuationChainLink>& currentContinuation)
|
||||
{
|
||||
/** HEURISTIC APPROACH:
|
||||
* Due to the computational complexity of full circularity detection,
|
||||
* we implement a heuristically adequate check: when we find 2 sequences
|
||||
* where one depends on the other, and the other has reached timeout,
|
||||
* we assume this is a likely gridlock. This approach is not
|
||||
* algorithmically complete (it may miss some complex circular
|
||||
* dependencies or flag false positives), but it is heuristically useful
|
||||
* for debugging and identifying potential concurrency issues in
|
||||
* practice.
|
||||
*
|
||||
* See the file-local comment above for the complete algorithm
|
||||
* explanation.
|
||||
*/
|
||||
|
||||
/** NOTICE:
|
||||
* Generally we should have all global data structures owned by a single
|
||||
* ComponentThread; and qutexes really should only be used to serialize
|
||||
* async sequences being enqueued on the same ComponentThread. But this
|
||||
* doesn't prevent multiple CPUs from trying to add/remove entries to/from
|
||||
* the acquisition history at the same time. Why? The acquisition history
|
||||
* isn't per-CPU, it's global.
|
||||
*
|
||||
* The problem with using a SpinLock here is that if the STL uses mutexes
|
||||
* internally to lock containers, we could end up in a situation where
|
||||
* spinning waiters will be busy-spinning while the owner is sleeping?
|
||||
*
|
||||
* But this should not happen since the nature of the order of operations is
|
||||
* that the spinlock ensures that only one CPU at a time can be
|
||||
* adding/removing entries; and thus everytime an method is called on the
|
||||
* unordered_map, the caller will always succeed at acquiring the underlying
|
||||
* STL mutex.
|
||||
*
|
||||
* So it should be safe to use a SpinLock here.
|
||||
*/
|
||||
acquisitionHistoryLock.acquire();
|
||||
|
||||
// Iterate through all entries in the acquisition history
|
||||
for (const auto& entry : acquisitionHistory)
|
||||
{
|
||||
const auto& continuation = entry.first;
|
||||
const auto& historyEntry = entry.second;
|
||||
|
||||
// Skip the current continuation (don't compare with itself)
|
||||
if (continuation == currentContinuation) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if firstFailedQutex is in this continuation's held locks
|
||||
const std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>&
|
||||
heldLocks = historyEntry.second;
|
||||
|
||||
if (!heldLocks)
|
||||
{ continue; }
|
||||
|
||||
for (const auto& heldLock : *heldLocks)
|
||||
{
|
||||
/* Found firstFailedQutex in another continuation's held locks
|
||||
* This indicates a potential gridlock
|
||||
*/
|
||||
if (&heldLock.get() != &firstFailedQutex)
|
||||
{ continue; }
|
||||
|
||||
acquisitionHistoryLock.release();
|
||||
|
||||
std::cerr << __func__ << ": GRIDLOCK DETECTED: Current "
|
||||
"continuation @" << currentContinuation.get()
|
||||
<< " wants lock '" << firstFailedQutex.name
|
||||
<< "' which is held by continuation @"
|
||||
<< continuation.get() << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
acquisitionHistoryLock.release();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool QutexAcquisitionHistoryTracker
|
||||
::completelyTraceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex)
|
||||
{
|
||||
(void)firstFailedQutex;
|
||||
|
||||
/** ALGORITHMICALLY COMPLETE VERSION:
|
||||
* This function implements the algorithmically complete version of gridlock
|
||||
* detection that performs full circularity detection. It builds a dependency
|
||||
* graph from the acquisition history and uses DFS with cycle detection to
|
||||
* identify true circular dependencies.
|
||||
*
|
||||
* See the file-local comment above for the complete algorithm explanation.
|
||||
*/
|
||||
|
||||
acquisitionHistoryLock.acquire();
|
||||
|
||||
// Helper function to print continuation dependency info
|
||||
auto printContinuationDependency = [&](
|
||||
const auto& fromContinuation, const auto& toContinuation
|
||||
)
|
||||
{
|
||||
auto it = acquisitionHistory.find(fromContinuation);
|
||||
if (it != acquisitionHistory.end())
|
||||
{
|
||||
const auto& wantedLock = it->second.first;
|
||||
std::cerr << " Continuation @" << fromContinuation.get()
|
||||
<< " wants lock[\"" << wantedLock.get().name << "\"], "
|
||||
<< "held by continuation @" << toContinuation.get()
|
||||
<< std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << " Continuation @" << fromContinuation.get()
|
||||
<< " -> continuation @" << toContinuation.get()
|
||||
<< std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
// Pass true to dontAcquireLock since we already hold it
|
||||
auto graph = generateGraph(true);
|
||||
|
||||
// Early return if no graph or no cycles
|
||||
if (!graph || !graph->hasCycles())
|
||||
{
|
||||
acquisitionHistoryLock.release();
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cycles = graph->findCycles();
|
||||
|
||||
std::cerr << __func__ << ": CIRCULAR DEPENDENCIES DETECTED: Found "
|
||||
<< cycles.size() << " cycle(s) in lock dependency graph:" << std::endl;
|
||||
|
||||
for (size_t i = 0; i < cycles.size(); ++i)
|
||||
{
|
||||
const auto& cycle = cycles[i];
|
||||
std::cerr << " Cycle " << (i + 1) << ":\n";
|
||||
|
||||
for (size_t j = 0; j < cycle.size() - 1; ++j)
|
||||
{
|
||||
const auto& currentContinuation = cycle[j];
|
||||
const auto& nextContinuation = cycle[j + 1];
|
||||
printContinuationDependency(currentContinuation, nextContinuation);
|
||||
}
|
||||
|
||||
if (cycle.empty())
|
||||
{ continue; }
|
||||
|
||||
/* Handle the last edge (back to start of cycle)
|
||||
*/
|
||||
const auto& lastContinuation = cycle[cycle.size() - 1];
|
||||
const auto& firstContinuation = cycle[0];
|
||||
printContinuationDependency(lastContinuation, firstContinuation);
|
||||
}
|
||||
|
||||
acquisitionHistoryLock.release();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace sscl
|
||||
Reference in New Issue
Block a user