Initial commit of libspinscale library

2026-06-24 20:08:34 +00:00 · 2025-12-28 03:54:22 -04:00
parent 6dbab54f50
commit 3f3ff1283f
29 changed files with 3875 additions and 0 deletions
@@ -0,0 +1,28 @@
+#include <spinscale/component.h>
+#include <spinscale/puppetApplication.h>
+#include <spinscale/marionette.h>
+
+namespace sscl {
+
+Component::Component(const std::shared_ptr<ComponentThread> &thread)
+:	thread(thread)
+{
+}
+
+PuppetComponent::PuppetComponent(
+	PuppetApplication &parent, const std::shared_ptr<ComponentThread> &thread)
+:	Component(thread),
+parent(parent)
+{
+}
+
+namespace mrntt {
+
+MarionetteComponent::MarionetteComponent(
+	const std::shared_ptr<sscl::ComponentThread> &thread)
+:	sscl::Component(thread)
+{
+}
+
+} // namespace mrntt
+} // namespace sscl
@@ -0,0 +1,325 @@
+#include <boostAsioLinkageFix.h>
+#include <unistd.h>
+#include <iostream>
+#include <string>
+#include <pthread.h>
+#include <sched.h>
+#include <boost/asio/io_service.hpp>
+#include <spinscale/asynchronousContinuation.h>
+#include <spinscale/callback.h>
+#include <spinscale/callableTracer.h>
+#include <spinscale/componentThread.h>
+#include <spinscale/marionette.h>
+
+namespace sscl {
+
+namespace mrntt {
+// Global variable to store the marionette thread ID
+// Default value is 0, but should be set by application code via setMarionetteThreadId()
+ThreadId marionetteThreadId = 0;
+
+void setMarionetteThreadId(ThreadId id)
+{
+	marionetteThreadId = id;
+}
+} // namespace mrntt
+
+} // namespace sscl
+
+namespace sscl {
+
+thread_local std::shared_ptr<ComponentThread> thisComponentThread;
+
+namespace mrntt {
+// Global marionette thread instance - defined here but initialized by application
+std::shared_ptr<MarionetteThread> thread;
+} // namespace mrntt
+
+// Implementation of static method
+std::shared_ptr<MarionetteThread> ComponentThread::getMrntt()
+{
+	return sscl::mrntt::thread;
+}
+
+void MarionetteThread::initializeTls(void)
+{
+	thisComponentThread = shared_from_this();
+}
+
+void PuppetThread::initializeTls(void)
+{
+	thisComponentThread = shared_from_this();
+}
+
+bool ComponentThread::tlsInitialized(void)
+{
+	return thisComponentThread != nullptr;
+}
+
+const std::shared_ptr<ComponentThread> ComponentThread::getSelf(void)
+{
+	if (!thisComponentThread)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": TLS not initialized");
+	}
+
+	return thisComponentThread;
+}
+
+class PuppetThread::ThreadLifetimeMgmtOp
+:	public PostedAsynchronousContinuation<threadLifetimeMgmtOpCbFn>
+{
+public:
+	ThreadLifetimeMgmtOp(
+		const std::shared_ptr<ComponentThread> &caller,
+		const std::shared_ptr<PuppetThread> &target,
+		Callback<threadLifetimeMgmtOpCbFn> callback)
+	:	PostedAsynchronousContinuation<threadLifetimeMgmtOpCbFn>(
+			caller, callback),
+	target(target)
+	{}
+
+public:
+	const std::shared_ptr<PuppetThread> target;
+
+public:
+	void joltThreadReq1_posted(
+		[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
+		)
+	{
+		std::cout << __func__ << ": Thread '" << target->name << "': handling "
+			"JOLT request."
+			<< "\n";
+
+		target->io_service.stop();
+		callOriginalCb();
+	}
+
+	void startThreadReq1_posted(
+		[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
+		)
+	{
+		std::cout << __func__ << ": Thread '" << target->name << "': handling "
+			"startThread."
+			<< "\n";
+
+		// Execute private setup sequence here
+		// This is where each thread would implement its specific initialization
+
+		callOriginalCb();
+	}
+
+	void exitThreadReq1_mainQueue_posted(
+		[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
+		)
+	{
+		std::cout << __func__ << ": Thread '" << target->name << "': handling "
+			"exitThread (main queue)." << "\n";
+
+		target->cleanup();
+		target->io_service.stop();
+		callOriginalCb();
+	}
+
+	void exitThreadReq1_pauseQueue_posted(
+		[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
+		)
+	{
+		std::cout << __func__ << ": Thread '" << target->name << "': handling "
+			"exitThread (pause queue)."<< "\n";
+
+		target->cleanup();
+		target->pause_io_service.stop();
+		target->io_service.stop();
+		callOriginalCb();
+	}
+
+	void pauseThreadReq1_posted(
+		[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
+		)
+	{
+		std::cout << __func__ << ": Thread '" << target->name << "': handling "
+			"pauseThread." << "\n";
+
+		/* We have to invoke the callback here before moving on because 
+		 * our next operation is going to block the thread, so it won't
+		 * have a chance to invoke the callback until it's unblocked.
+		 */
+		callOriginalCb();
+		target->pause_io_service.reset();
+		target->pause_io_service.run();
+	}
+
+	void resumeThreadReq1_posted(
+		[[maybe_unused]] std::shared_ptr<ThreadLifetimeMgmtOp> context
+		)
+	{
+		std::cout << __func__ << ": Thread '" << target->name << "': handling "
+			"resumeThread." << "\n";
+
+		target->pause_io_service.stop();
+		callOriginalCb();
+	}
+};
+
+void ComponentThread::cleanup(void)
+{
+	this->keepLooping = false;
+}
+
+void PuppetThread::joltThreadReq(
+	const std::shared_ptr<PuppetThread>& selfPtr,
+	Callback<threadLifetimeMgmtOpCbFn> callback)
+{
+	/**	EXPLANATION:
+	 * We can't use shared_from_this() here because JOLTing occurs prior to
+	 * TLS being set up.
+	 *
+	 * We also can't use getSelf() as yet for the same reason: getSelf()
+	 * requires TLS to be set up.
+	 *
+	 * To obtain a sh_ptr to the caller, we just supply the mrntt thread since
+	 * JOLT is always invoked by the mrntt thread. The JOLT sequence that the
+	 * CRT main() function invokes on the mrntt thread is special since it
+	 * supplies cmdline args and envp.
+	 *
+	 * To obtain a sh_ptr to the target thread, we use the selfPtr parameter
+	 * passed in by the caller.
+	 */
+	if (id == sscl::mrntt::marionetteThreadId)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": invoked on mrntt thread");
+	}
+
+	std::shared_ptr<MarionetteThread> mrntt = sscl::mrntt::thread;
+
+	auto request = std::make_shared<ThreadLifetimeMgmtOp>(
+		mrntt, selfPtr, callback);
+
+	this->getIoService().post(
+		STC(std::bind(
+			&ThreadLifetimeMgmtOp::joltThreadReq1_posted,
+			request.get(), request)));
+}
+
+// Thread management method implementations
+void PuppetThread::startThreadReq(Callback<threadLifetimeMgmtOpCbFn> callback)
+{
+	std::shared_ptr<ComponentThread> caller = getSelf();
+	auto request = std::make_shared<ThreadLifetimeMgmtOp>(
+		caller, std::static_pointer_cast<PuppetThread>(shared_from_this()),
+		callback);
+
+	this->getIoService().post(
+		STC(std::bind(
+			&ThreadLifetimeMgmtOp::startThreadReq1_posted,
+			request.get(), request)));
+}
+
+void PuppetThread::exitThreadReq(Callback<threadLifetimeMgmtOpCbFn> callback)
+{
+	std::shared_ptr<ComponentThread> caller = getSelf();
+	auto request = std::make_shared<ThreadLifetimeMgmtOp>(
+		caller, std::static_pointer_cast<PuppetThread>(shared_from_this()),
+		callback);
+
+	this->getIoService().post(
+		STC(std::bind(
+			&ThreadLifetimeMgmtOp::exitThreadReq1_mainQueue_posted,
+			request.get(), request)));
+
+	pause_io_service.post(
+		STC(std::bind(
+			&ThreadLifetimeMgmtOp::exitThreadReq1_pauseQueue_posted,
+			request.get(), request)));
+}
+
+void PuppetThread::pauseThreadReq(Callback<threadLifetimeMgmtOpCbFn> callback)
+{
+	if (id == sscl::mrntt::marionetteThreadId)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": invoked on mrntt thread");
+	}
+
+	std::shared_ptr<ComponentThread> caller = getSelf();
+	auto request = std::make_shared<ThreadLifetimeMgmtOp>(
+		caller, std::static_pointer_cast<PuppetThread>(shared_from_this()),
+		callback);
+
+	this->getIoService().post(
+		STC(std::bind(
+			&ThreadLifetimeMgmtOp::pauseThreadReq1_posted,
+			request.get(), request)));
+}
+
+void PuppetThread::resumeThreadReq(Callback<threadLifetimeMgmtOpCbFn> callback)
+{
+	if (id == sscl::mrntt::marionetteThreadId)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": invoked on mrntt thread");
+	}
+
+	// Post to the pause_io_service to unblock the paused thread
+	std::shared_ptr<ComponentThread> caller = getSelf();
+	auto request = std::make_shared<ThreadLifetimeMgmtOp>(
+		caller, std::static_pointer_cast<PuppetThread>(shared_from_this()),
+		callback);
+
+	pause_io_service.post(
+		STC(std::bind(
+			&ThreadLifetimeMgmtOp::resumeThreadReq1_posted,
+			request.get(), request)));
+}
+
+// CPU management method implementations
+int ComponentThread::getAvailableCpuCount()
+{
+	int cpuCount = sysconf(_SC_NPROCESSORS_ONLN);
+	if (cpuCount <= 0)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": Failed to determine CPU count");
+	}
+
+	// Check if std::thread::hardware_concurrency() matches sysconf result
+	unsigned int hwConcurrency = std::thread::hardware_concurrency();
+	if (hwConcurrency != static_cast<unsigned int>(cpuCount))
+	{
+		std::cerr << "Warning: CPU count mismatch - "
+			"std::thread::hardware_concurrency() = "
+			<< hwConcurrency << ", sysconf(_SC_NPROCESSORS_ONLN) = "
+			<< cpuCount << "\n";
+	}
+
+	return cpuCount;
+}
+
+void PuppetThread::pinToCpu(int cpuId)
+{
+	if (cpuId < 0)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": Invalid CPU ID: " + std::to_string(cpuId));
+	}
+
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpuId, &cpuset);
+
+	int result = pthread_setaffinity_np(
+		thread.native_handle(), sizeof(cpu_set_t), &cpuset);
+	if (result != 0)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": Failed to pin thread to CPU " + std::to_string(cpuId)
+			+ ": " + std::strerror(result));
+	}
+
+	pinnedCpuId = cpuId;
+}
+
+} // namespace sscl
@@ -0,0 +1,5 @@
+#include <spinscale/lockerAndInvokerBase.h>
+
+namespace sscl {
+
+} // namespace sscl
@@ -0,0 +1,222 @@
+#include <iostream>
+#include <spinscale/asynchronousContinuation.h>
+#include <spinscale/asynchronousLoop.h>
+#include <spinscale/callback.h>
+#include <spinscale/puppetApplication.h>
+#include <spinscale/componentThread.h>
+
+namespace sscl {
+
+PuppetApplication::PuppetApplication(
+	const std::vector<std::shared_ptr<PuppetThread>> &threads)
+:	componentThreads(threads)
+{
+}
+
+class PuppetApplication::PuppetThreadLifetimeMgmtOp
+:	public NonPostedAsynchronousContinuation<puppetThreadLifetimeMgmtOpCbFn>
+{
+public:
+	PuppetThreadLifetimeMgmtOp(
+		PuppetApplication &parent, unsigned int nThreads,
+		Callback<puppetThreadLifetimeMgmtOpCbFn> callback)
+	:	NonPostedAsynchronousContinuation<puppetThreadLifetimeMgmtOpCbFn>(callback),
+	loop(nThreads),
+	parent(parent)
+	{}
+
+public:
+	AsynchronousLoop	loop;
+	PuppetApplication &parent;
+
+public:
+	void joltAllPuppetThreadsReq1(
+		[[maybe_unused]] std::shared_ptr<PuppetThreadLifetimeMgmtOp> context
+		)
+	{
+		loop.incrementSuccessOrFailureDueTo(true);
+		if (!loop.isComplete()) {
+			return;
+		}
+
+		parent.threadsHaveBeenJolted = true;
+		callOriginalCb();
+	}
+
+	void executeGenericOpOnAllPuppetThreadsReq1(
+		[[maybe_unused]] std::shared_ptr<PuppetThreadLifetimeMgmtOp> context
+		)
+	{
+		loop.incrementSuccessOrFailureDueTo(true);
+		if (!loop.isComplete()) {
+			return;
+		}
+
+		callOriginalCb();
+	}
+
+	void exitAllPuppetThreadsReq1(
+		[[maybe_unused]] std::shared_ptr<PuppetThreadLifetimeMgmtOp> context
+		)
+	{
+		loop.incrementSuccessOrFailureDueTo(true);
+		if (!loop.isComplete()) {
+			return;
+		}
+
+		for (auto& thread : parent.componentThreads) {
+			thread->thread.join();
+		}
+
+		callOriginalCb();
+	}
+};
+
+void PuppetApplication::joltAllPuppetThreadsReq(
+	Callback<puppetThreadLifetimeMgmtOpCbFn> callback
+	)
+{
+	if (threadsHaveBeenJolted)
+	{
+		std::cout << "Mrntt: All puppet threads already JOLTed. "
+			<< "Skipping JOLT request." << "\n";
+		callback.callbackFn();
+		return;
+	}
+
+	// If no threads, set flag and call callback immediately
+	if (componentThreads.size() == 0 && callback.callbackFn)
+	{
+		threadsHaveBeenJolted = true;
+		callback.callbackFn();
+		return;
+	}
+
+	// Create a counter to track when all threads have been jolted
+	auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
+		*this, componentThreads.size(), callback);
+
+	for (auto& thread : componentThreads)
+	{
+		thread->joltThreadReq(
+			thread,
+			{request, std::bind(
+				&PuppetThreadLifetimeMgmtOp::joltAllPuppetThreadsReq1,
+				request.get(), request)});
+	}
+}
+
+void PuppetApplication::startAllPuppetThreadsReq(
+	Callback<puppetThreadLifetimeMgmtOpCbFn> callback
+	)
+{
+	// If no threads, call callback immediately
+	if (componentThreads.size() == 0 && callback.callbackFn)
+	{
+		callback.callbackFn();
+		return;
+	}
+
+	// Create a counter to track when all threads have started
+	auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
+		*this, componentThreads.size(), callback);
+
+	for (auto& thread : componentThreads)
+	{
+		thread->startThreadReq(
+			{request, std::bind(
+				&PuppetThreadLifetimeMgmtOp::executeGenericOpOnAllPuppetThreadsReq1,
+				request.get(), request)});
+	}
+}
+
+void PuppetApplication::pauseAllPuppetThreadsReq(
+	Callback<puppetThreadLifetimeMgmtOpCbFn> callback
+	)
+{
+	// If no threads, call callback immediately
+	if (componentThreads.size() == 0 && callback.callbackFn)
+	{
+		callback.callbackFn();
+		return;
+	}
+
+	// Create a counter to track when all threads have paused
+	auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
+		*this, componentThreads.size(), callback);
+
+	for (auto& thread : componentThreads)
+	{
+		thread->pauseThreadReq(
+			{request, std::bind(
+				&PuppetThreadLifetimeMgmtOp::executeGenericOpOnAllPuppetThreadsReq1,
+				request.get(), request)});
+	}
+}
+
+void PuppetApplication::resumeAllPuppetThreadsReq(
+	Callback<puppetThreadLifetimeMgmtOpCbFn> callback
+	)
+{
+	// If no threads, call callback immediately
+	if (componentThreads.size() == 0 && callback.callbackFn)
+	{
+		callback.callbackFn();
+		return;
+	}
+
+	// Create a counter to track when all threads have resumed
+	auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
+		*this, componentThreads.size(), callback);
+
+	for (auto& thread : componentThreads)
+	{
+		thread->resumeThreadReq(
+			{request, std::bind(
+				&PuppetThreadLifetimeMgmtOp::executeGenericOpOnAllPuppetThreadsReq1,
+				request.get(), request)});
+	}
+}
+
+void PuppetApplication::exitAllPuppetThreadsReq(
+	Callback<puppetThreadLifetimeMgmtOpCbFn> callback
+	)
+{
+	// If no threads, call callback immediately
+	if (componentThreads.size() == 0 && callback.callbackFn)
+	{
+		callback.callbackFn();
+		return;
+	}
+
+	// Create a counter to track when all threads have exited
+	auto request = std::make_shared<PuppetThreadLifetimeMgmtOp>(
+		*this, componentThreads.size(), callback);
+
+	for (auto& thread : componentThreads)
+	{
+		thread->exitThreadReq(
+			{request, std::bind(
+				&PuppetThreadLifetimeMgmtOp::executeGenericOpOnAllPuppetThreadsReq1,
+				request.get(), request)});
+	}
+}
+
+void PuppetApplication::distributeAndPinThreadsAcrossCpus()
+{
+	int cpuCount = ComponentThread::getAvailableCpuCount();
+
+	// Distribute and pin threads across CPUs
+	int threadIndex = 0;
+	for (auto& thread : componentThreads)
+	{
+		int targetCpu = threadIndex % cpuCount;
+		thread->pinToCpu(targetCpu);
+		++threadIndex;
+	}
+
+	std::cout << __func__ << ": Distributed " << threadIndex << " threads "
+		<< "across " << cpuCount << " CPUs\n";
+}
+
+} // namespace sscl
@@ -0,0 +1,380 @@
+#include <spinscale/qutex.h>
+#include <spinscale/lockerAndInvokerBase.h>
+
+namespace sscl {
+
+bool Qutex::tryAcquire(
+	const LockerAndInvokerBase &tryingLockvoker, int nRequiredLocks
+	)
+{
+	lock.acquire();
+
+	const int qNItems = static_cast<int>(queue.size());
+
+	// If queue is empty, this should never happen since we register before trying to acquire
+	if (qNItems < 1)
+	{
+		lock.release();
+
+		throw std::runtime_error(
+			std::string(__func__) + 
+			": tryAcquire called on empty queue - this should never happen");
+	}
+
+	// If lock is already owned, fail
+	if (isOwned)
+	{
+		lock.release();
+		return false;
+	}
+
+	/**	EXPLANATION:
+	 * Calculate how many items from the rear we need to scan
+	 *
+	 * For nRequiredLocks=1: must be at front (nRearItemsToScan = qNItems, scan all)
+	 * For nRequiredLocks=2: must be in top 50% (nRearItemsToScan = qNItems/2)
+	 * For nRequiredLocks=3: must be in top 66% (nRearItemsToScan = qNItems/3)
+	 * etc.
+	 */
+	const int nRearItemsToScan = qNItems / nRequiredLocks;
+
+	// If we're the only item in queue, or if the fraction calculation
+	// results in 0 rear items to scan, we automatically succeed
+	if (qNItems == 1 || nRearItemsToScan < 1)
+	{
+		isOwned = true;
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+		// Use the stored iterator from the LockSet
+		auto it = tryingLockvoker.getLockvokerIteratorForQutex(*this);
+		currOwner = *it;
+#endif
+		lock.release();
+		return true;
+	}
+
+	// For single-lock requests, they must be at the front of the queue
+	if (nRequiredLocks == 1)
+	{
+		bool ret = false;
+
+		if ((*queue.front()) == tryingLockvoker)
+		{
+			isOwned = true;
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+			currOwner = queue.front();
+#endif
+			ret = true;
+		}
+		else {
+			ret = false;
+		}
+
+		lock.release();
+		return ret;
+	}
+
+	// For multi-lock requests, check if the lockvoker is in the rear portion
+	// If it's NOT in the rear portion, then it's in the top X% and should succeed
+	auto rIt = queue.rbegin();
+	auto rEndIt = queue.rend();
+	bool foundInRear = false;
+
+	for (int i = 0; i < nRearItemsToScan && rIt != rEndIt; ++rIt, ++i)
+	{
+		if ((**rIt) == tryingLockvoker)
+		{
+			foundInRear = true;
+			break;
+		}
+	}
+
+	if (foundInRear)
+	{
+		// Found in rear portion - not in top X%, so fail
+		lock.release();
+		return false;
+	}
+
+	// Not found in rear portion - must be in top X%, so succeed
+	isOwned = true;
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+	// Use the stored iterator from the LockSet
+	auto it = tryingLockvoker.getLockvokerIteratorForQutex(*this);
+	currOwner = *it;
+#endif
+	lock.release();
+	return true;
+}
+
+void Qutex::backoff(
+	const LockerAndInvokerBase &failedAcquirer, int nRequiredLocks
+	)
+{
+	lock.acquire();
+
+	const int nQItems = static_cast<int>(queue.size());
+	
+	if (nQItems < 1)
+	{
+		lock.release();
+
+		throw std::runtime_error(
+			std::string(__func__) +
+			": backoff called on empty queue - this should never happen");
+	}
+
+	/* Check if failedAcquirer is at the front of the queue with
+	* nRequiredLocks == 1. This should never happen because an
+	* acquirer at the front of the queue with nRequiredLocks == 1
+	* should always succeed.
+	*/
+	const LockerAndInvokerBase& oldFront = *queue.front();
+	if (oldFront == failedAcquirer && nRequiredLocks == 1)
+	{
+		lock.release();
+
+		throw std::runtime_error(
+			std::string(__func__) +
+			": Failed acquirer is at front of queue with nRequiredLocks==1 - "
+			"acquirer at front of queue with nRequiredLocks==1 should always "
+			"succeed.");
+	}
+
+	// Rotate queue members if failedAcquirer is at front of queue
+	if (oldFront == failedAcquirer && nQItems > 1)
+	{
+		/**	EXPLANATION:
+		 * Rotate the top LockSet.size() items in the queue by moving
+		 * the failedAcquirer to the last position in the top
+		 * LockSet.size() items within the queue.
+		 *
+		 * I.e: if queue.size()==20, and lockSet.size()==5, then move
+		 * failedAcquirer from the front to the 5th position in the queue,
+		 * which should push the other 4 items forward.
+		 * If queue.size()==3 and LockSet.size()==5, then just
+		 * push_back(failedAcquirer).
+		 *
+		 * It is impossible for a Qutex queue to have only one
+		 * item in it, yet for that Lockvoker item to have failed to
+		 * acquire the Qutex. Being the only item in the ticketQ
+		 * means that you must succeed at acquiring the Qutex.
+		 */
+		int indexOfItemToInsertCurrFrontBefore;
+		if (nQItems > nRequiredLocks) {
+			indexOfItemToInsertCurrFrontBefore = nRequiredLocks;
+		} else
+		{
+			// -1 means insert at back -- i.e, use list::end() as insertPos.
+			indexOfItemToInsertCurrFrontBefore = -1;
+		}
+
+		/*	EXPLANATION:
+		 * Rotate them here.
+		 *
+		 * The reason why we do this rotation is to avoid a particular kind
+		 * of deadlock wherein a grid of async requests is perfectly
+		 * configured so as to guarantee that none of them can make any
+		 * forward progress unless they get reordered.
+		 *
+		 * Consider 2 different locks with 2 different items in them
+		 * each, both of which come from 2 particular requests:
+		 *	Qutex1: Lockvoker1, Lv2
+		 *	Qutex2: Lv2, Lv1
+		 *
+		 * Moreover, both of these lockvokers have requiredLocks.size()==2,
+		 * and the particular 2 locks that each one requires are indeed
+		 * Qutex1 and Qutex2.
+		 *
+		 * This particular setup basically means that in TL1's queue, Lv1
+		 * will wakeup since it's at the front of TL1. It'll successfully
+		 * acquire TL1 (since it's at the front), and then it'll try to
+		 * acquire TL2. But since Lv1 isn't in the top 50% of items in TL2's
+		 * queue, Lv1 will fail to acquire TL2.
+		 *
+		 * Then similarly, in TL2's queue, Lv2 will wakeup since it's at
+		 * the front. Again, it'll successfully acquire TL2 since it's at
+		 * the front of TL2's queue. But then it'll try to acquire TL1.
+		 * Since it's not in the top 50% of TL1's enqueued items, it'll fail
+		 * to acquire TL1.
+		 *
+		 * N.B: This type of perfectly ordered deadlock can occur in any
+		 * kind of NxN situation where ticketQ.size()==requiredLocks.size().
+		 * That could be 4x4, 5x5, 6x6, etc. It doesn't happen in 1x1
+		 * because a Lockvoker that only requires one lock will always just
+		 * succeed if it's at the front of its queue.
+		 *
+		 * This state of affairs is stable and will persist unless these
+		 * queues are reordered in some way. Hence: that's why we rotate the
+		 * items in a QutexQ after backing off of it. Backing off means
+		 * Not necessarily that the calling LockVoker failed to acquire
+		 * THIS PARTICULAR Qutex, but rather than it failed to acquire
+		 * ALL of its required locks.
+		 *
+		 * Hence, if we are backing out, we should also rotate the items
+		 * in the queue if the current front item is the failed acquirer.
+		 * So that's why we do this rotation here.
+		 */
+
+		// Find the iterator for the failed acquirer (which is at the front)
+		auto frontIt = queue.begin();
+
+		// Find the position to insert before using indexOfItemToInsertCurrFrontBefore
+		auto insertPos = queue.begin();
+		if (indexOfItemToInsertCurrFrontBefore == -1)
+		{
+			// -1 means insert at the back (before end())
+			insertPos = queue.end();
+		}
+		else
+		{
+			// Move to the specified position (0-based index)
+			for (
+				int i = 0;
+				i < indexOfItemToInsertCurrFrontBefore
+					&& insertPos != queue.end(); ++i)
+			{
+				++insertPos;
+			}
+		}
+
+		/**	NOTE:
+		 * According to https://en.cppreference.com/w/cpp/container/list/splice:
+		 *	"No iterators or references become invalidated. If *this and other
+		 *	refer to different objects, the iterators to the transferred elements
+		 *	now refer into *this, not into other."
+		 *
+		 * So our stored iterator inside of LockSet will still be valid after
+		 * the splice, and we can use it to unregister the lockvoker later on.
+		 */
+		queue.splice(insertPos, queue, frontIt);
+	}
+
+	isOwned = false;
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+	currOwner = nullptr;
+#endif
+	LockerAndInvokerBase &newFront = *queue.front();
+
+	lock.release();
+
+	/**	EXPLANATION:
+	 * Why should this never happen? Well, if we were at the front of the queue
+	 * and we failed to acquire the lock, we should have been rotated away from
+	 * the front. On the other hand, if we were not at the front of the queue
+	 * and we failed to acquire the lock, then we weren't at the front of the
+	 * queue to begin with.
+	 * The exception is if the queue has only one item in it.
+	 *
+	 * Hence there ought to be no way for the failedAcquirer to be at the front
+	 * of the queue at this point UNLESS the queue has only one item in it.
+	 */
+	if (newFront == failedAcquirer && nQItems > 1)
+	{
+		throw std::runtime_error(
+			std::string(__func__) +
+			": Failed acquirer is at the front of the queue at the end of "
+			"backoff, yet nQItems > 1 - this should never happen");
+	}
+
+	/**	EXPLANATION:
+	 * We should always awaken whoever is at the front of the queue, even if
+	 * we didn't rotate. Why? Consider this scenario:
+	 *
+	 *	Lv1 has LockSet.size==1. Lv2 has LockSet.size==3.
+	 *	Lv1's required lock overlaps with Lv2's set of 3 required locks.
+	 *	Lv1 registers itself in its 1 qutex's queue.
+	 *	Lv2 registers itself in all 3 of its qutexes' queues.
+	 *	Lv2 acquires the lock that it needs in common with Lv1.
+	 *		(Assume that Lv2 was not at the front of the common qutex's
+	 *		internal queue -- it only needed to be in the top 66%.)
+	 *	Lv1 tries to acquire the common lock and fails. It gets taken off of
+	 *		its io_service. It's now asleep until it gets
+	 *		re-added into an io_service.
+	 *	Lv2 fails to acquire the other 2 locks it needs and backoff()s from
+	 *		the common lock it shares with Lv1.
+	 *
+	 *	If Lv2 does NOT awaken the item at the front of the common lock's
+	 *	queue (aka: Lv1), then Lv1 is doomed to never wake up again.
+	 *
+	 * Hence: backout() callers should always wake up the lockvoker at the
+	 * front of their queue before leaving.
+	 *
+	 * The exception is if the item at the front is the backout() caller
+	 * itself. This can happen if, for example a multi-locking lockvoker
+	 * is backing off of a qutex within which it's the only waiter.
+	 */
+	if (nQItems > 1) {
+		newFront.awaken();
+	}
+}
+
+void Qutex::release()
+{
+	lock.acquire();
+
+	/**	EXPLAINATION:
+	 * A qutex must not have its release() called when it's not owned. The
+	 * plumbing required to permit that is a bit excessive, and we have
+	 * instrumentation to track early qutex release()ing in
+	 * SerializedAsynchronousContinuation.
+	 */
+	if (!isOwned
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+		|| currOwner == nullptr
+#endif
+	)
+	{
+		throw std::runtime_error(
+			std::string(__func__) +
+			": release() called on unowned qutex - this should never happen");
+	}
+
+	isOwned = false;
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+	currOwner = nullptr;
+#endif
+
+	// It's possible for there to be 0 items left in queue after unregistering.
+	if (queue.empty())
+	{
+		lock.release();
+		return;
+	}
+
+	/** EXPLANATION:
+	 * It would be nice to be able to optimize by only awakening if the
+	 * release()ing lockvoker was at the front of the qutexQ, but if we
+	 * don't unconditionally wakeup() the front item, we could get lost
+	 * wakeups. Consider:
+	 *
+	 *	Lv1 only has 1 requiredLock.
+	 *	Lv2 has 3 requiredLocks. One of its requiredLocks overlaps with
+	 *		Lv1's single requiredLock. So they both share a common lock.
+	 *	Lv3's currently owns Lv1 & Lv2's common requiredLock.
+	 *	Lv3 release()s that common lock.
+	 *	Lv1 happens to be next in queue after Lv3 unregisters itself.
+	 *	Lv3 wakes up Lv1.
+	 *	Just before Lv1 can acquire the common lock, Lv2 acquires it now,
+	 *		because it only needs to be in the top 66% to succeed.
+	 *	Lv1 checks the currOwner and sees that it's owned. Lv1 is now
+	 *		dequeued from its io_service. It won't be awakened until someone
+	 *		awakens it.
+	 *	Lv2 finishes its critical section and releas()es the common lock.
+	 *	Lv2 was not at the front of the qutexQ, so it does NOT awaken the
+	 *		current item at the front.
+	 *
+	 * Thus, Lv1 never gets awakened again. The end.
+	 * This also means that no LockSet.size()==1 lockvoker will ever be able
+	 * to run again since they can only run if they are at the front of the
+	 * qutexQ.
+	 *
+	 * Therefore we must always awaken the front item when releas()ing.
+	 */
+	LockerAndInvokerBase &front = *queue.front();
+
+	lock.release();
+
+	front.awaken();
+}
+
+} // namespace sscl
@@ -0,0 +1,393 @@
+#include <spinscale/qutexAcquisitionHistoryTracker.h>
+#include <spinscale/serializedAsynchronousContinuation.h>
+#include <spinscale/qutex.h>
+#include <spinscale/dependencyGraph.h>
+#include <memory>
+#include <forward_list>
+#include <functional>
+#include <iostream>
+#include <algorithm>
+
+namespace sscl {
+
+void DependencyGraph::addNode(const Node& node)
+{
+	adjacencyList[node]; // Creates empty set if node doesn't exist
+}
+
+void DependencyGraph::addEdge(const Node& source, const Node& target)
+{
+	addNode(source);
+	addNode(target);
+	adjacencyList[source].insert(target);
+}
+
+std::vector<std::vector<DependencyGraph::Node>>
+DependencyGraph::findCycles() const
+{
+	std::unordered_set<Node> visited;
+	std::unordered_set<Node> recursionStack;
+	std::vector<std::vector<Node>> cycles;
+	std::vector<Node> path;
+
+	for (const auto& entry : adjacencyList)
+	{
+		const Node& node = entry.first;
+		if (visited.find(node) == visited.end()) {
+			dfsCycleDetection(node, visited, recursionStack, path, cycles);
+		}
+	}
+
+	return cycles;
+}
+
+bool DependencyGraph::hasCycles() const
+{
+	std::unordered_set<Node> visited;
+	std::unordered_set<Node> recursionStack;
+	std::vector<std::vector<Node>> cycles;
+	std::vector<Node> path;
+
+	for (const auto& entry : adjacencyList)
+	{
+		const Node& node = entry.first;
+		if (visited.find(node) == visited.end())
+		{
+			dfsCycleDetection(node, visited, recursionStack, path, cycles);
+			if (!cycles.empty())
+				{ return true; }
+		}
+	}
+
+	return false;
+}
+
+size_t DependencyGraph::getNodeCount() const
+{
+	return adjacencyList.size();
+}
+
+void DependencyGraph::dfsCycleDetection(
+	const Node& node,
+	std::unordered_set<Node>& visited,
+	std::unordered_set<Node>& recursionStack,
+	std::vector<Node>& path,
+	std::vector<std::vector<Node>>& cycles
+	)
+	const
+{
+	// Mark current node as visited and add to recursion stack
+	visited.insert(node);
+	recursionStack.insert(node);
+	path.push_back(node);
+
+	// Check all adjacent nodes
+	auto it = adjacencyList.find(node);
+	if (it != adjacencyList.end())
+	{
+		for (const auto& adjacent : it->second)
+		{
+			// If adjacent node is in recursion stack, we found a cycle
+			if (recursionStack.find(adjacent) != recursionStack.end())
+			{
+				// Find the start of the cycle in the current path
+				auto cycleStart = std::find(path.begin(), path.end(), adjacent);
+				if (cycleStart != path.end())
+				{
+					std::vector<Node> cycle(cycleStart, path.end());
+					cycle.push_back(adjacent); // Complete the cycle
+					cycles.push_back(cycle);
+				}
+			}
+			// If adjacent node hasn't been visited, recurse
+			else if (visited.find(adjacent) == visited.end())
+			{
+				dfsCycleDetection(
+					adjacent, visited, recursionStack, path, cycles);
+			}
+		}
+	}
+
+	// Remove from recursion stack and path when backtracking
+	recursionStack.erase(node);
+	path.pop_back();
+}
+
+// QutexAcquisitionHistoryTracker implementation
+std::unique_ptr<DependencyGraph>
+QutexAcquisitionHistoryTracker::generateGraph(bool dontAcquireLock)
+{
+	auto graph = std::make_unique<DependencyGraph>();
+
+	if (!dontAcquireLock) {
+		acquisitionHistoryLock.acquire();
+	}
+
+	// First pass: Add all continuations as nodes
+	for (const auto& entry : acquisitionHistory)
+	{
+		const auto& continuation = entry.first;
+		graph->addNode(continuation);
+	}
+
+	// Second pass: Add edges based on lock dependencies
+	for (const auto& entry : acquisitionHistory)
+	{
+		const auto& continuation = entry.first;
+		const auto& historyEntry = entry.second;
+		const auto& wantedLock = historyEntry.first;
+		const auto& heldLocks = historyEntry.second;
+
+		if (!heldLocks) { continue; }
+
+		// Check if any other continuation holds the lock this continuation wants
+		for (const auto& otherEntry : acquisitionHistory)
+		{
+			const auto& otherContinuation = otherEntry.first;
+			const auto& otherHistoryEntry = otherEntry.second;
+			const auto& otherHeldLocks = otherHistoryEntry.second;
+
+			// Skip self-comparison
+			if (continuation == otherContinuation) { continue; }
+			if (!otherHeldLocks) { continue; }
+
+			// Check if other continuation holds the wanted lock
+			for (const auto& otherHeldLock : *otherHeldLocks)
+			{
+				if (&otherHeldLock.get() == &wantedLock.get())
+				{
+					// Add edge: continuation -> otherContinuation
+					// (continuation wants a lock held by otherContinuation)
+					graph->addEdge(continuation, otherContinuation);
+					break;
+				}
+			}
+		}
+	}
+
+	if (!dontAcquireLock) {
+		acquisitionHistoryLock.release();
+	}
+
+	return graph;
+}
+
+/**	EXPLANATION - GRIDLOCK DETECTION ALGORITHM:
+ * This file implements gridlock detection algorithms that use a central
+ * acquisition history to track all lockvokers suspected of being gridlocked.
+ *
+ *	ALGORITHM OVERVIEW:
+ * 1. When a lockvoker finds that DEADLOCK_TIMEOUT_MS has elapsed and it
+ *    still can't acquire a particular lock (firstFailedQutex), it creates
+ *    a new entry in a global acquisition history.
+ *
+ * 2. The acquisition history is an unordered_map with:
+ *    - Key: std::shared_ptr<AsynchronousContinuationChainLink>
+ *		(the timed-out lockvoker's continuation)
+ *    - Value: std::pair<
+ *					std::reference_wrapper<Qutex>,
+ *					std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>>
+ *      * pair.first: The firstFailedQutex that this lockvoker WANTS but
+ *			can't acquire. This metadata is essential for later-arriving
+ *			entrants to analyze what their predecessor timed-out sequences
+ *			want.
+ *      * pair.second: A unique_ptr to a list of all acquired Qutexes in this
+ *			lockvoker's continuation history.
+ *
+ * 3. Each timed-out lockvoker:
+ *    a) Adds itself to the acquisition history map with its wanted lock and
+ *		acquired locks
+ *    b) Iterates through all OTHER entries in the map (excluding itself)
+ *    c) For each other entry, checks if that entry's acquired locks
+ *		(pair.second) contains the lock that this lockvoker wants
+ *		(aka: firstFailedQutex/pair.first)
+ *    d) If found, we have detected a gridlock: two sequences where at least
+ *		one wants a lock held by the other, and the other wants a lock that
+ *		it can't acquire.
+ *
+ *	GRIDLOCK CONDITION:
+ * A gridlock exists when we find a circular chain of dependencies:
+ * - Lockvoker A wants LockX but can't acquire it (held by Lockvoker B)
+ * - Lockvoker B wants LockY but can't acquire it (held by Lockvoker C, D, etc.)
+ * - The chain must be circular (eventually leading back to Lockvoker A or another
+ *   lockvoker in the chain) to ensure it's a true gridlock, not just a delay
+ *
+ *	TIMED DELAY, I/O DELAY, or LONG-RUNNING OPERATION FALSE-POSITIVE:
+ * Without circularity detection, we could incorrectly flag a simple delay, I/O
+ * delay, or long-running operation as a gridlock. For example: Lockvoker A
+ * wants LockX (held by Lockvoker B), and Lockvoker B is currently in a 10-second
+ * sleep/delay. When B wakes up, it will release LockX, allowing A to proceed.
+ * This is not a gridlock - it's just A waiting longer than DEADLOCK_TIMEOUT_MS
+ * for B to finish its work. True gridlocks require circular dependencies where
+ * no sequence can make progress because they're all waiting for each other in
+ * a cycle.
+ *
+ * The central history metadata enables us to detect complex gridlocks involving
+ * multiple lockvokers (2, 3, 4, 5+ sequences) by building up the acquisition
+ * history over time as different lockvokers timeout and add their information.
+ */
+
+bool QutexAcquisitionHistoryTracker
+::heuristicallyTraceContinuationHistoryForGridlockOn(
+	Qutex &firstFailedQutex,
+	std::shared_ptr<AsynchronousContinuationChainLink>& currentContinuation)
+{
+	/**	HEURISTIC APPROACH:
+	 * Due to the computational complexity of full circularity detection,
+	 * we implement a heuristically adequate check: when we find 2 sequences
+	 * where one depends on the other, and the other has reached timeout,
+	 * we assume this is a likely gridlock. This approach is not
+	 * algorithmically complete (it may miss some complex circular
+	 * dependencies or flag false positives), but it is heuristically useful
+	 * for debugging and identifying potential concurrency issues in
+	 * practice.
+	 *
+	 * See the file-local comment above for the complete algorithm
+	 * explanation.
+	 */
+
+	/**	NOTICE:
+	 * Generally we should have all global data structures owned by a single
+	 * ComponentThread; and qutexes really should only be used to serialize
+	 * async sequences being enqueued on the same ComponentThread. But this
+	 * doesn't prevent multiple CPUs from trying to add/remove entries to/from
+	 * the acquisition history at the same time. Why? The acquisition history
+	 * isn't per-CPU, it's global.
+	 *
+	 * The problem with using a SpinLock here is that if the STL uses mutexes
+	 * internally to lock containers, we could end up in a situation where
+	 * spinning waiters will be busy-spinning while the owner is sleeping?
+	 *
+	 * But this should not happen since the nature of the order of operations is
+	 * that the spinlock ensures that only one CPU at a time can be
+	 * adding/removing entries; and thus everytime an method is called on the
+	 * unordered_map, the caller will always succeed at acquiring the underlying
+	 * STL mutex.
+	 *
+	 * So it should be safe to use a SpinLock here.
+	 */
+	acquisitionHistoryLock.acquire();
+
+	// Iterate through all entries in the acquisition history
+	for (const auto& entry : acquisitionHistory)
+	{
+		const auto& continuation = entry.first;
+		const auto& historyEntry = entry.second;
+
+		// Skip the current continuation (don't compare with itself)
+		if (continuation == currentContinuation) {
+			continue;
+		}
+
+		// Check if firstFailedQutex is in this continuation's held locks
+		const std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>&
+			heldLocks = historyEntry.second;
+
+		if (!heldLocks)
+			{ continue; }
+
+		for (const auto& heldLock : *heldLocks)
+		{
+			/* Found firstFailedQutex in another continuation's held locks
+			 * This indicates a potential gridlock
+			 */
+			if (&heldLock.get() != &firstFailedQutex)
+				{ continue; }
+
+			acquisitionHistoryLock.release();
+
+			std::cerr << __func__ << ": GRIDLOCK DETECTED: Current "
+				"continuation @" << currentContinuation.get()
+				<< " wants lock '" << firstFailedQutex.name
+				<< "' which is held by continuation @"
+				<< continuation.get() << std::endl;
+
+			return true;
+		}
+	}
+
+	acquisitionHistoryLock.release();
+	return false;
+}
+
+bool QutexAcquisitionHistoryTracker
+::completelyTraceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex)
+{
+	(void)firstFailedQutex;
+
+	/** ALGORITHMICALLY COMPLETE VERSION:
+	 * This function implements the algorithmically complete version of gridlock
+	 * detection that performs full circularity detection. It builds a dependency
+	 * graph from the acquisition history and uses DFS with cycle detection to
+	 * identify true circular dependencies.
+	 *
+	 * See the file-local comment above for the complete algorithm explanation.
+	 */
+
+	acquisitionHistoryLock.acquire();
+
+	// Helper function to print continuation dependency info
+	auto printContinuationDependency = [&](
+		const auto& fromContinuation, const auto& toContinuation
+		)
+	{
+		auto it = acquisitionHistory.find(fromContinuation);
+		if (it != acquisitionHistory.end())
+		{
+			const auto& wantedLock = it->second.first;
+			std::cerr << "    Continuation @" << fromContinuation.get()
+				<< " wants lock[\"" << wantedLock.get().name << "\"], "
+				<< "held by continuation @" << toContinuation.get()
+				<< std::endl;
+		}
+		else
+		{
+			std::cerr << "    Continuation @" << fromContinuation.get()
+				<< " -> continuation @" << toContinuation.get()
+				<< std::endl;
+		}
+	};
+
+	// Pass true to dontAcquireLock since we already hold it
+	auto graph = generateGraph(true);
+
+	// Early return if no graph or no cycles
+	if (!graph || !graph->hasCycles())
+	{
+		acquisitionHistoryLock.release();
+		return false;
+	}
+
+	auto cycles = graph->findCycles();
+
+	std::cerr << __func__ << ": CIRCULAR DEPENDENCIES DETECTED: Found "
+		<< cycles.size() << " cycle(s) in lock dependency graph:" << std::endl;
+
+	for (size_t i = 0; i < cycles.size(); ++i)
+	{
+		const auto& cycle = cycles[i];
+		std::cerr << "  Cycle " << (i + 1) << ":\n";
+
+		for (size_t j = 0; j < cycle.size() - 1; ++j)
+		{
+			const auto& currentContinuation = cycle[j];
+			const auto& nextContinuation = cycle[j + 1];
+			printContinuationDependency(currentContinuation, nextContinuation);
+		}
+
+		if (cycle.empty())
+			{ continue; }
+
+		/* Handle the last edge (back to start of cycle)
+		 */
+		const auto& lastContinuation = cycle[cycle.size() - 1];
+		const auto& firstContinuation = cycle[0];
+		printContinuationDependency(lastContinuation, firstContinuation);
+	}
+
+	acquisitionHistoryLock.release();
+
+	return true;
+}
+
+} // namespace sscl