Add AGENTS.md

Salmanoff: Version increment to v0.01.001
Docs: Document ambience stimbuff and high-val param
2026-04-01 21:55:53 -04:00 · 2025-11-23 07:35:21 -04:00 · 2025-11-23 07:34:59 -04:00 · 2025-11-23 07:28:04 -04:00 · 2025-11-23 07:25:53 -04:00 · 2025-11-23 07:20:55 -04:00
189 changed files with 23094 additions and 2075 deletions
@@ -12,3 +12,4 @@ config.h.in
 configure
 *.swp
 cscope.out
+*.tmp
@@ -0,0 +1,3 @@
+[submodule "third_party/googletest"]
+	path = third_party/googletest
+	url = https://github.com/google/googletest.git
@@ -3,12 +3,11 @@
        {
            "name": "Linux",
            "includePath": [
-                "${workspaceFolder}/**",
                "${workspaceFolder}/include",
                "${workspaceFolder}/smocore/include",
+                "${workspaceFolder}/b/include",
                "/usr/include",
-                "/usr/local/include",
-                "${workspaceFolder}/b/include"
+                "/usr/local/include"
            ],
            "defines": [],
            "compilerPath": "/usr/bin/g++",
@@ -24,7 +23,8 @@
            },
            "forcedInclude": [
                "${workspaceFolder}/b/include/config.h"
-            ]
+            ],
+            "configurationProvider": "ms-vscode.cmake-tools"
        }
    ],
    "version": 4
@@ -1,83 +1,112 @@
 {
    "files.associations": {
-        "cstdint": "cpp",
-        "array": "cpp",
-        "atomic": "cpp",
-        "bit": "cpp",
-        "*.tcc": "cpp",
-        "cctype": "cpp",
-        "charconv": "cpp",
-        "chrono": "cpp",
-        "clocale": "cpp",
-        "cmath": "cpp",
-        "compare": "cpp",
-        "concepts": "cpp",
-        "condition_variable": "cpp",
-        "cstdarg": "cpp",
-        "cstddef": "cpp",
-        "cstdio": "cpp",
-        "cstdlib": "cpp",
-        "ctime": "cpp",
-        "cwchar": "cpp",
-        "cwctype": "cpp",
-        "deque": "cpp",
-        "map": "cpp",
-        "set": "cpp",
-        "string": "cpp",
-        "unordered_map": "cpp",
-        "vector": "cpp",
-        "exception": "cpp",
-        "algorithm": "cpp",
-        "functional": "cpp",
-        "iterator": "cpp",
-        "memory": "cpp",
-        "memory_resource": "cpp",
-        "numeric": "cpp",
-        "optional": "cpp",
-        "random": "cpp",
-        "ratio": "cpp",
-        "string_view": "cpp",
-        "system_error": "cpp",
-        "tuple": "cpp",
-        "type_traits": "cpp",
-        "utility": "cpp",
-        "format": "cpp",
-        "fstream": "cpp",
-        "initializer_list": "cpp",
-        "iomanip": "cpp",
-        "iosfwd": "cpp",
-        "iostream": "cpp",
-        "istream": "cpp",
-        "limits": "cpp",
-        "mutex": "cpp",
-        "new": "cpp",
-        "numbers": "cpp",
-        "ostream": "cpp",
-        "semaphore": "cpp",
-        "span": "cpp",
-        "sstream": "cpp",
-        "stdexcept": "cpp",
-        "stop_token": "cpp",
-        "streambuf": "cpp",
-        "thread": "cpp",
-        "typeinfo": "cpp",
-        "variant": "cpp",
-        "cstring": "cpp",
-        "cinttypes": "cpp",
-        "any": "cpp",
-        "codecvt": "cpp",
-        "complex": "cpp",
-        "coroutine": "cpp",
-        "csignal": "cpp",
-        "list": "cpp",
-        "source_location": "cpp",
-        "future": "cpp",
-        "shared_mutex": "cpp",
-        "typeindex": "cpp",
-        "bitset": "cpp"
-    },
+		"*.cl": "c",
+		"cstdint": "cpp",
+		"array": "cpp",
+		"atomic": "cpp",
+		"bit": "cpp",
+		"*.tcc": "cpp",
+		"cctype": "cpp",
+		"charconv": "cpp",
+		"chrono": "cpp",
+		"clocale": "cpp",
+		"cmath": "cpp",
+		"compare": "cpp",
+		"concepts": "cpp",
+		"condition_variable": "cpp",
+		"cstdarg": "cpp",
+		"cstddef": "cpp",
+		"cstdio": "cpp",
+		"cstdlib": "cpp",
+		"ctime": "cpp",
+		"cwchar": "cpp",
+		"cwctype": "cpp",
+		"deque": "cpp",
+		"map": "cpp",
+		"set": "cpp",
+		"string": "cpp",
+		"unordered_map": "cpp",
+		"vector": "cpp",
+		"exception": "cpp",
+		"algorithm": "cpp",
+		"functional": "cpp",
+		"iterator": "cpp",
+		"memory": "cpp",
+		"memory_resource": "cpp",
+		"numeric": "cpp",
+		"optional": "cpp",
+		"random": "cpp",
+		"ratio": "cpp",
+		"string_view": "cpp",
+		"system_error": "cpp",
+		"tuple": "cpp",
+		"type_traits": "cpp",
+		"utility": "cpp",
+		"format": "cpp",
+		"fstream": "cpp",
+		"initializer_list": "cpp",
+		"iomanip": "cpp",
+		"iosfwd": "cpp",
+		"iostream": "cpp",
+		"istream": "cpp",
+		"limits": "cpp",
+		"mutex": "cpp",
+		"new": "cpp",
+		"numbers": "cpp",
+		"ostream": "cpp",
+		"semaphore": "cpp",
+		"span": "cpp",
+		"sstream": "cpp",
+		"stdexcept": "cpp",
+		"stop_token": "cpp",
+		"streambuf": "cpp",
+		"thread": "cpp",
+		"typeinfo": "cpp",
+		"variant": "cpp",
+		"cstring": "cpp",
+		"cinttypes": "cpp",
+		"any": "cpp",
+		"codecvt": "cpp",
+		"complex": "cpp",
+		"coroutine": "cpp",
+		"csignal": "cpp",
+		"list": "cpp",
+		"source_location": "cpp",
+		"future": "cpp",
+		"shared_mutex": "cpp",
+		"typeindex": "cpp",
+		"bitset": "cpp",
+		"*.ipp": "cpp",
+		"unordered_set": "cpp",
+		"forward_list": "cpp",
+		"barrier": "cpp",
+		"strstream": "cpp",
+		"regex": "cpp",
+		"stacktrace": "cpp",
+		"stdfloat": "cpp",
+		"cfenv": "cpp",
+		"expected": "cpp",
+		"valarray": "cpp",
+		"core": "cpp",
+		"nonlinearoptimization": "cpp",
+		"*.txx": "cpp"
+	},
    "editor.rulers": [80, 120],
    "editor.tabSize": 4,
    "editor.insertSpaces": false,
-    "editor.detectIndentation": false
+    "editor.detectIndentation": false,
+    "editor.inlayHints.enabled": "off",
+    "C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
+    "C_Cpp.default.browse.limitSymbolsToIncludedHeaders": true,
+    "C_Cpp.default.browse.path": [
+        "${workspaceFolder}",
+        "${workspaceFolder}/b"
+    ],
+    "C_Cpp.default.includePath": [
+        "${workspaceFolder}/include",
+        "${workspaceFolder}/smocore/include",
+        "${workspaceFolder}/b/include",
+        "/usr/include",
+        "/usr/local/include"
+    ]
 }
@@ -0,0 +1,10 @@
+# Project Instructions
+
+- Always break functions into logical subfunctions. No long-scrolling functions, in any language. This applies to source code, scripts, build scripts, CMake, Makefiles, and similar project files. Preserve this subfunction splitting discipline during refactors.
+- Modularity is non-negotiable. Always group logically related functions together into a module. Preserve modularity during refactors.
+- Reuse or extend existing abstractions instead of duplicating logic wherever possible. Don't repeat yourself. The goal here is to prevent duplication. Not to discourage appropriate logical separation of prior abstractions into new logical abstractions where sensible.
+- Always isolate configurable behaviour into configuration variables appropriate for the language and framework being used.
+- Never bake in literals; at minimum, declare them at the top of the file with a semantically meaningful name.
+- UI should be responsive. Always prefer to use pre-packaged UI toolkit widgets, containers and colour sets harmoniously, instead of writing custom CSS overrides. Write custom CSS only if there's no UI toolkit mechanism available.
+- Aggressively isolate, split off, deduplicate and reuse code which can be made into common library code. Do the same with UI elements. Do this both when implementing new features and opportunistically while refactoring or changing old code/UI elements.
+- Names of files, functions, classes, abstractions, database fields, etc should be aimed at disambiguating purpose and function, rather than at brevity.
@@ -1,15 +1,19 @@
 cmake_minimum_required(VERSION 3.16)
-project(salmanoff VERSION 0.00.004 LANGUAGES CXX)
+project(salmanoff VERSION 0.01.001 LANGUAGES CXX)

 include(CMakeDependentOption)
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/DAPSS.cmake)
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/DebugOpts.cmake)
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/VerifyBoostDynamic.cmake)

 # Set C++ standard
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)

 # Build type
 if(NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE Debug FORCE)
+	set(CMAKE_BUILD_TYPE Debug FORCE)
 endif()

 # Compiler flags
@@ -22,10 +26,67 @@ if(NOT MIND_VOSCILLATOR_PERIOD_MS GREATER 0)
 endif()
 math(EXPR MIND_VOSCILLATOR_FREQ_MS "1000 / ${MIND_VOSCILLATOR_PERIOD_MS}")

+# Device manager reattacher configuration
+set(MRNTT_DEVMGR_REATTACHER_PERIOD_MS 2000
+	CACHE STRING "Device manager reattacher period (ms)")
+if(NOT MRNTT_DEVMGR_REATTACHER_PERIOD_MS GREATER 0)
+    message(FATAL_ERROR
+		"MRNTT_DEVMGR_REATTACHER_PERIOD_MS must be a positive integer > 0")
+endif()
+
+# Stimulus buffer frame period configuration
+set(STIMBUFF_FRAME_PERIOD_MS 33
+	CACHE STRING "Stimulus buffer frame period (ms)")
+if(NOT STIMBUFF_FRAME_PERIOD_MS GREATER 0)
+    message(FATAL_ERROR
+		"STIMBUFF_FRAME_PERIOD_MS must be a positive integer > 0")
+endif()
+
+# Stimulus buffer frame retry delay configuration
+set(STIMBUFF_FRAME_RETRY_DELAY_MS 1
+	CACHE STRING "Stimulus buffer frame retry delay (ms)")
+if(NOT STIMBUFF_FRAME_RETRY_DELAY_MS GREATER 0)
+    message(FATAL_ERROR
+		"STIMBUFF_FRAME_RETRY_DELAY_MS must be a positive integer > 0")
+endif()
+
+# World thread configuration
+option(WORLD_USE_BODY_THREAD
+	"Use body thread for world component instead of separate world thread" OFF)
+
+# Test configuration
+option(ENABLE_TESTS "Enable building tests" OFF)
+
+# Set the debug locks variable for config.h
+if(ENABLE_DEBUG_LOCKS)
+    set(CONFIG_ENABLE_DEBUG_LOCKS TRUE)
+endif()
+
+# Set the debug trace callables variable for config.h
+if(ENABLE_DEBUG_TRACE_CALLABLES)
+    set(CONFIG_DEBUG_TRACE_CALLABLES TRUE)
+    # Suppress frame-address warnings when using __builtin_return_address()
+	# with values above 0 (See callableTracer.h).
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-frame-address")
+endif()
+
+# Set the world thread variable for config.h
+if(WORLD_USE_BODY_THREAD)
+    set(CONFIG_WORLD_USE_BODY_THREAD TRUE)
+endif()
+
+# Set the timeout variable for config.h
+set(CONFIG_DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS ${DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS})
+# Set the stimulus buffer frame period variable for config.h
+set(CONFIG_STIMBUFF_FRAME_PERIOD_MS ${STIMBUFF_FRAME_PERIOD_MS})
+# Set the stimulus buffer frame retry delay variable for config.h
+set(CONFIG_STIMBUFF_FRAME_RETRY_DELAY_MS ${STIMBUFF_FRAME_RETRY_DELAY_MS})
+
 # Configure config.h
 configure_file(
    ${CMAKE_CURRENT_SOURCE_DIR}/include/config.h.in
    ${CMAKE_CURRENT_BINARY_DIR}/include/config.h
+    @ONLY
 )

 # Include directories
@@ -36,32 +97,130 @@ include_directories(
 )

 # Find core dependencies
-find_package(Boost 1.69.0 REQUIRED COMPONENTS system)
+# We cannot use header-only Boost.Asio because we need both our dlopen()'d
+# libraries and the main binary to refer to the same instances of boost::asio's
+# metadata. If we use header-only Boost.Asio, each dlopen()'d library will have
+# its own copy of boost::asio's metadata, which will cause a segfault if
+# boost::asio objects are used inside of a dlopen()'d library.
+#
+# Honestly, I never liked this whole "header-only" idea so I'm happy to be rid
+# of it.
+#
+# Tell CMake we're linking against the shared library (not header-only)
+set(Boost_USE_STATIC_LIBS OFF)
+set(Boost_USE_HEADER_ONLY OFF)
+find_package(Boost REQUIRED COMPONENTS system log)
+# Define BOOST_ALL_DYN_LINK project-wide to ensure all Boost libraries use dynamic linking
+add_compile_definitions(BOOST_ALL_DYN_LINK)
+
 find_package(PkgConfig REQUIRED)
 find_package(FLEX REQUIRED)
 find_package(BISON REQUIRED)

+# Find OpenCL 1.2 or higher: try find_package first, fall back to pkg-config
+find_package(OpenCL 1.2 QUIET)
+if(OpenCL_FOUND)
+	# Normalize find_package variables to match pkg_check_modules naming
+	set(OPENCL_FOUND TRUE)
+	set(OPENCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS})
+	# Handle both OpenCL_LIBRARY (singular) and OpenCL_LIBRARIES (plural)
+	if(OpenCL_LIBRARIES)
+		set(OPENCL_LIBRARIES ${OpenCL_LIBRARIES})
+	else()
+		set(OPENCL_LIBRARIES ${OpenCL_LIBRARY})
+	endif()
+	set(OPENCL_LIBRARY_DIRS "")
+	message(STATUS "Found OpenCL using find_package")
+
+	# Check if version is available and validate
+	if(OpenCL_VERSION)
+		if(OpenCL_VERSION VERSION_LESS "1.2")
+			message(FATAL_ERROR
+				"OpenCL version ${OpenCL_VERSION} found, but 1.2 or higher is required")
+		endif()
+		message(STATUS "OpenCL version: ${OpenCL_VERSION}")
+	else()
+		message(WARNING
+			"OpenCL version could not be determined. "
+			"Version 1.2+ is required at runtime.")
+	endif()
+else()
+	# Fall back to pkg-config
+	pkg_check_modules(OPENCL OpenCL)
+	if(NOT OPENCL_FOUND)
+		message(FATAL_ERROR
+			"Failed to find OpenCL: both find_package and "
+			"pkg_check_modules failed. Try installing the "
+			"'ocl-icd-opencl-dev' package (or the appropriate "
+			"OpenCL development package for your system)."
+		)
+	endif()
+	message(STATUS "Found OpenCL using pkg-config")
+	message(WARNING
+		"OpenCL version could not be determined via pkg-config. "
+		"Version 1.2+ is required at runtime.")
+endif()
+
 # Need dlopen() and dlsym()
 find_library(DL_LIBRARY NAMES dl ldl)
 if(NOT DL_LIBRARY)
    message(FATAL_ERROR "Dynamic linking library (libdl/libldl) not found")
 endif()

+# Add third-party dependencies
+if(ENABLE_TESTS)
+    add_subdirectory(third_party)
+endif()
+add_subdirectory(compile)
 # Add core components
 add_subdirectory(smocore)
 add_subdirectory(commonLibs)
-add_subdirectory(senseApis)
+add_subdirectory(stimBuffApis)
 add_subdirectory(wilzorApis)
+add_subdirectory(devices)

 # Main executable
 add_executable(salmanoff main.cpp)
 target_link_libraries(salmanoff
+	Boost::system Boost::log
    smocore
-    marionette
-    deviceManager
-    senseApis
-    ${Boost_LIBRARIES}
    ${DL_LIBRARY}
+    attachmentSupport
 )

+# Verify Boost dynamic dependencies after build
+add_custom_command(TARGET salmanoff POST_BUILD
+	COMMAND ${CMAKE_COMMAND} -DVERIFY_FILE="$<TARGET_FILE:salmanoff>"
+		-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VerifyBoostDynamic.cmake
+	COMMENT "Verifying Boost dynamic dependencies for salmanoff"
+)
+
+# Add all registered DAPSS targets as dependencies
+add_all_daps_dependencies()
+
+# Add tests if enabled
+if(ENABLE_TESTS)
+    enable_testing()
+    add_subdirectory(tests)
+endif()
+
 install(TARGETS salmanoff DESTINATION bin)
+
+# Install device configuration files (preprocessed .daps files)
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/devices/
+    DESTINATION share/salmanoff/devices
+    FILES_MATCHING PATTERN "*.daps"
+)
+
+# Install documentation
+install(FILES README.md DESTINATION share/doc/salmanoff)
+install(FILES LICENSE DESTINATION share/doc/salmanoff)
+
+# Install example configurations if they exist
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/examples")
+    install(DIRECTORY examples/ DESTINATION share/salmanoff/examples)
+endif()
+
+# Include CPack configuration
+include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CPackConfig.cmake)
+include(CPack)
@@ -0,0 +1,25 @@
+Copyright (c) 2024 Salmanoff Project. All rights reserved.
+
+PROPRIETARY SOFTWARE LICENSE
+
+This software and associated documentation files (the "Software") are
+proprietary and confidential. The Software is owned exclusively by the
+Salmanoff Project and is protected by copyright laws and international
+treaty provisions.
+
+NO LICENSE GRANTED. No person or entity is granted any rights or
+permissions to use, copy, modify, merge, publish, distribute, sublicense,
+sell, or otherwise transfer the Software or any portion thereof without
+explicit written permission from the Salmanoff Project.
+
+UNAUTHORIZED USE PROHIBITED. Any unauthorized use, reproduction, or
+distribution of the Software is strictly prohibited and may result in
+severe civil and criminal penalties.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,84 @@
+# Package Generation
+
+This project supports generating both Debian (.deb) and RPM (.rpm) packages
+using CPack.
+
+## Manual Package Generation
+
+### Prerequisites
+
+- CMake 3.16 or later
+- Make or Ninja build system
+- For RPM packages: `rpmbuild` utility
+
+### Build Process
+
+1. **Create build directory:**
+   ```bash
+   mkdir -p build-package
+   cd build-package
+   ```
+
+2. **Configure with CMake:**
+   ```bash
+   cmake .. -DCMAKE_BUILD_TYPE=Release
+   ```
+
+3. **Build the project:**
+   ```bash
+   make -j$(nproc)
+   ```
+
+4. **Generate packages:**
+   ```bash
+   cpack -G DEB    # Generate Debian package
+   cpack -G RPM    # Generate RPM package (requires rpmbuild)
+   ```
+
+### Requirements for RPM Generation
+
+To generate RPM packages, you need `rpmbuild` installed:
+
+- **Ubuntu/Debian**: `sudo apt-get install rpm`
+- **CentOS/RHEL**: `sudo yum install rpm-build`
+- **Fedora**: `sudo dnf install rpm-build`
+
+### Package Contents
+
+The generated packages include:
+
+- **Main executable**: `/usr/bin/salmanoff`
+- **Shared libraries**: `/usr/lib/lib*.so`
+- **Device configurations**: `/usr/share/salmanoff/devices/` (preprocessed
+  .daps files)
+- **Documentation**: `/usr/share/doc/salmanoff/`
+
+### Installing Packages
+
+**Debian/Ubuntu:**
+```bash
+sudo dpkg -i salmanoff-0.00.004-x86_64.deb
+```
+
+**CentOS/RHEL/Fedora:**
+```bash
+sudo rpm -i salmanoff-0.00.004-x86_64.rpm
+```
+
+### Package Configuration
+
+Package metadata and configuration is defined in
+`cmake/CPackConfig.cmake`. This includes:
+
+- Package name, version, and description
+- Dependencies and recommendations
+- License information
+- File naming conventions
+
+### Troubleshooting
+
+- **RPM generation fails**: Ensure `rpmbuild` is installed
+- **Missing dependencies**: Check that all build dependencies are
+  installed
+- **Permission errors**: Ensure you have write permissions in the build
+  directory
@@ -1,6 +1,8 @@
 # The Salmanoff Project:

-![Salmanoff project](docs/img/salmanoff-logo-dark-512.png)
+<p align="center">
+	<img src="docs/img/salmanoff-logo-512.png" alt="Salmanoff project logo" />
+</p>

 This project, Salmanoff (pronounced: Sal-man-off), is an ROS rewrite of the Harikoff project. The name is more reflective of the people whose ideas sparked the solutions in my mind. These people are:
 * Gregory `SAL`mieri.
@@ -8,3 +10,5 @@ This project, Salmanoff (pronounced: Sal-man-off), is an ROS rewrite of the Hari
 * Leonard Peik`OFF`.

 Would you like to know what this project is and does? Well, it's a secret! But you can find out by reading the code. Or you could just ask me. Or you could wait until I release it. But that's no fun.
+
+For package generation instructions, see [PACKAGING.md](PACKAGING.md).
@@ -0,0 +1,86 @@
+# Bug somehow related to either OpenClCollateAndMeshingEngine or PcloudStimBuff:
+
+printSlotBytes: Slot 21 vaddr=0xfffff7fb4000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+printSlotBytes: Slot 22 vaddr=0xfffff7fb5000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+printSlotBytes: Slot 23 vaddr=0xfffff7fb6000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+printSlotBytes: Slot 24 vaddr=0xfffff7fb7000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+printSlotBytes: Slot 25 vaddr=0xfffff7fb8000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+printSlotBytes: Slot 26 vaddr=0xfffff7fb9000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+printSlotBytes: Slot 27 vaddr=0xfffff7fba000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+printSlotBytes: Slot 28 vaddr=0xfffff7fbb000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+printSlotBytes: Slot 29 vaddr=0xfffff7fbc000 (4 bytes):
+0000: 05 01 01 00                                      |....|
+produceFrameReq2_assembleDone: Successfully assembled frame 29 slots succeeded out of 30 total slots
+compactCollateAndMeshFrameReq: Started compact kernel
+startKernel: already running, call stop() first
+produceFrameReq3_compactCollateDone: Failed to compact and collate frame
+Mrntt: About to detach all sense devices.
+xcbWindow_detachDeviceReq: Detached X11 window device:
+Device Identifier: win0, Sensor Type: e, QualeIface API: visual-qualeiface, QualeIface API Params: (), StimBuff API: xcb, StimBuff API Params: (dev-substring ), Provider: xorg, Provider Params: (display=1 screen=0 ), Device Selector: mut
+
+enDisablePcloudDataReq2: Command timeout for device 3JEDK380010Z39
+detachDeviceReq1: Failed to disable pcloud data for stimbuff 3JEDK380010Z39
+stop: Stopped stimulus buffer for device 3JEDK380010Z39
+disconnectReq: Sent disconnect message to 10.42.0.139:65000
+detachDeviceReq2: Successfully detached pcloud stimbuff for device 3JEDK380010Z39 and possibly also destroyed device.
+Mrntt: Successfully detached 2 of 2 sense devices.
+Mrntt: About to finalize all stim buff api libs.
+stop: UDP Command Demuxer stopped
+stop: BroadcastListener stopped
+
+Thread 9 "rusticl queue t" received signal SIGSEGV, Segmentation fault.
+[Switching to Thread 0xffffca4ee140 (LWP 11695)]
+0x0000fffff48517b0 in std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)>::_Bind(std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> const&) (
+    this=0xffffdc000c70) at /usr/include/c++/13/functional:581
+581	      _Bind(const _Bind&) = default;
+(gdb) bt
+#0  0x0000fffff48517b0 in std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)>::_Bind(std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> const&) (this=0xffffdc000c70) at /usr/include/c++/13/functional:581
+#1  0x0000fffff4851818 in std::_Function_base::_Base_manager<std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> >::_M_create<std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> const&>(std::_Any_data&, std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> const&, std::integral_constant<bool, false>) (__dest=..., __f=...)
+    at /usr/include/c++/13/bits/std_function.h:161
+#2  0x0000fffff4850704 in std::_Function_base::_Base_manager<std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr--Type <RET> for more, q to quit, c to continue without paging--c
+<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> >::_M_init_functor<std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> const&>(std::_Any_data&, std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> const&) (__functor=..., __f=...) at /usr/include/c++/13/bits/std_function.h:215
+#3  0x0000fffff484fbf0 in std::_Function_base::_Base_manager<std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> >::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) (__dest=..., 
+    __source=..., __op=std::__clone_functor) at /usr/include/c++/13/bits/std_function.h:198
+#4  0x0000fffff484f0bc in std::_Function_handler<void (int), std::_Bind<void (smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq::*(smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq*, std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq>, int)> >::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) (__dest=..., 
+    __source=..., __op=std::__clone_functor) at /usr/include/c++/13/bits/std_function.h:282
+#5  0x0000fffff484f2b0 in std::function<void (int)>::function(std::function<void (int)> const&) (this=0xffffca4ecd40, __x=...) at /usr/include/c++/13/bits/std_function.h:391
+#6  0x0000fffff484e9c0 in std::_Bind<std::function<void (int)> (int)>::_Bind<int&>(std::function<void (int)> const&, int&) (this=0xffffca4ecd40, __f=...)
+    at /usr/include/c++/13/functional:572
+#7  0x0000fffff484e170 in std::bind<std::function<void (int)>&, int&>(std::function<void (int)>&, int&) (__f=...) at /usr/include/c++/13/functional:885
+#8  0x0000fffff484aa68 in smo::stim_buff::OpenClCollatingAndMeshingEngine::compactKernelEventCallback (event_command_exec_status=0, user_data=0xffffe4009e80)
+    at /home/latentprion/gits/salmanoff-git/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp:249
+#9  0x0000ffffcb3e34b4 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#10 0x0000ffffcb3d173c in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#11 0x0000ffffcb3d1d28 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#12 0x0000ffffcb3b0b34 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#13 0x0000ffffcb40886c in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#14 0x0000ffffcb39a728 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#15 0x0000ffffcb39a7b0 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#16 0x0000ffffcb3b0a40 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#17 0x0000ffffcb3b130c in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#18 0x0000ffffcb3d2dfc in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#19 0x0000ffffcb371148 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#20 0x0000ffffcb3f9b40 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#21 0x0000ffffcb3713c8 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#22 0x0000ffffcb378988 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#23 0x0000ffffcb37120c in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#24 0x0000ffffcb371000 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#25 0x0000ffffcb392888 in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#26 0x0000ffffcb45f23c in ?? () from /lib/aarch64-linux-gnu/libRusticlOpenCL.so.1
+#27 0x0000fffff7ac595c in start_thread (arg=0xfffff58cf880) at ./nptl/pthread_create.c:447
+#28 0x0000fffff7b2bb0c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone3.S:76
+(gdb) 
+
+## Race conditions in OClCollMeshEngn:
+
+engine not set up or invalid
+
+
@@ -0,0 +1,62 @@
+# CPack configuration for package generation
+# This file contains all CPack settings for generating deb and rpm packages
+
+# Set package metadata using project variables
+set(CPACK_PACKAGE_NAME "${PROJECT_NAME}")
+set(CPACK_PACKAGE_VERSION "${PROJECT_VERSION}")
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
+    "Salmanoff - A sensor management and control system")
+set(CPACK_PACKAGE_VENDOR "Salmanoff Project")
+set(CPACK_PACKAGE_CONTACT "maintainer@salmanoff.org")
+
+# Set package description
+set(CPACK_PACKAGE_DESCRIPTION
+    "Salmanoff is a comprehensive sensor management and control system that\n"
+    "provides unified interfaces for various sensor devices including LiDAR\n"
+    "systems. It features modular architecture with support for multiple\n"
+    "device types, asynchronous processing, and real-time data handling."
+)
+
+# License information
+set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
+set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
+
+# Enable deb and rpm generators
+set(CPACK_GENERATOR "DEB;RPM")
+
+# DEB package specific settings (Ubuntu)
+set(CPACK_DEBIAN_PACKAGE_MAINTAINER
+    "Salmanoff Project <maintainer@salmanoff.org>")
+set(CPACK_DEBIAN_PACKAGE_SECTION "science")
+set(CPACK_DEBIAN_PACKAGE_PRIORITY "optional")
+# Target Ubuntu distribution
+set(CPACK_DEBIAN_PACKAGE_DISTRIBUTION "ubuntu")
+# Build dependencies (from builddeps file)
+# These are needed to build the package from source
+set(CPACK_DEBIAN_PACKAGE_BUILD_DEPENDS
+    "build-essential, cmake (>= 3.16), libboost-all-dev, flex, bison, ocl-icd-opencl-dev, liburing-dev")
+
+# Runtime dependencies (from builddeps file - runtime equivalents)
+set(CPACK_DEBIAN_PACKAGE_DEPENDS
+    "libboost-system1.74.0 | libboost-system1.73.0 | libboost-system1.72.0, libboost-log1.74.0 | libboost-log1.73.0 | libboost-log1.72.0, libc6, libstdc++6, ocl-icd-libopencl1 | libopencl1, liburing2 | liburing1")
+set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "libxcb1, libx11-6")
+set(CPACK_DEBIAN_PACKAGE_SUGGESTS "livox-sdk")
+
+# RPM package specific settings
+set(CPACK_RPM_PACKAGE_LICENSE "Proprietary")
+set(CPACK_RPM_PACKAGE_GROUP "Applications/Engineering")
+set(CPACK_RPM_PACKAGE_URL "https://github.com/salmanoff/salmanoff")
+set(CPACK_RPM_PACKAGE_REQUIRES "boost-system >= 1.72.0, boost-log >= 1.72.0, glibc, libstdc++, ocl-icd, liburing")
+set(CPACK_RPM_PACKAGE_SUGGESTS "xcb, libX11, livox-sdk")
+
+# Package file naming using project variables
+set(CPACK_PACKAGE_FILE_NAME
+    "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
+
+# Enable automatic dependency detection for Debian packages
+# This uses dpkg-shlibdeps to automatically detect shared library dependencies
+set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
+
+# Set compression
+set(CPACK_DEB_COMPONENT_INSTALL ON)
+set(CPACK_RPM_COMPONENT_INSTALL ON)
@@ -0,0 +1,153 @@
+# DAPSS (Device Attachment Pipe Specification Source) preprocessing module
+# This module provides functionality to preprocess .dapss files to .daps files
+# using the C preprocessor, respecting include directories and target dependencies.
+#
+# Usage:
+#   add_daps_target(target_name SOURCES file1.dapss file2.dapss ...)
+#   register_daps_target(target_name)  # In subdirectories
+#   add_all_daps_dependencies()        # In main CMakeLists.txt
+#
+# Examples:
+#   add_daps_target(device_specs SOURCES devices/avia0.dapss devices/win0.dapss)
+#   register_daps_target(device_specs)
+#   add_all_daps_dependencies()
+#
+# The preprocessed .daps files will be placed in ${CMAKE_CURRENT_BINARY_DIR}/
+
+# Function to add a DAPSS preprocessing target
+# Usage: add_daps_target(target_name SOURCES file1.dapss file2.dapss ...)
+function(add_daps_target target_name)
+    set(options)
+    set(oneValueArgs)
+    set(multiValueArgs SOURCES)
+    cmake_parse_arguments(DAPS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(NOT DAPS_SOURCES)
+        message(FATAL_ERROR "add_daps_target: No SOURCES specified for target ${target_name}")
+    endif()
+
+    # Use binary directory directly for processed files
+    # This ensures files are created in the same directory as the target
+    set(output_dir "${CMAKE_CURRENT_BINARY_DIR}")
+
+    # List to store all output files
+    set(output_files)
+
+    # Process each source file
+    foreach(source_file ${DAPS_SOURCES})
+        # Get the base name without extension
+        get_filename_component(base_name ${source_file} NAME_WE)
+        get_filename_component(source_dir ${source_file} DIRECTORY)
+
+        # Create output file path
+        set(output_file "${output_dir}/${base_name}.daps")
+        list(APPEND output_files ${output_file})
+
+        # Get include directories from current directory and target
+        get_directory_property(include_dirs INCLUDE_DIRECTORIES)
+
+        # Build include flags
+        set(include_flags)
+        foreach(include_dir ${include_dirs})
+            list(APPEND include_flags "-I${include_dir}")
+        endforeach()
+
+        # Add current source directory to includes if it's not already there
+        if(source_dir)
+            list(APPEND include_flags "-I${source_dir}")
+        endif()
+
+        # Convert list to space-separated string
+        string(REPLACE ";" " " include_flags_str "${include_flags}")
+
+        # Find C compiler if not already set
+        if(NOT CMAKE_C_COMPILER)
+            find_program(CMAKE_C_COMPILER gcc cc clang)
+            if(NOT CMAKE_C_COMPILER)
+                message(FATAL_ERROR "No C compiler found for DAPSS preprocessing")
+            endif()
+        endif()
+
+        # Create custom command to preprocess the file
+        add_custom_command(
+            OUTPUT ${output_file}
+            COMMAND sh -c "\"${CMAKE_C_COMPILER}\" -E -P -x c ${include_flags_str} \"${CMAKE_CURRENT_SOURCE_DIR}/${source_file}\" > \"${output_file}\""
+            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source_file}
+            COMMENT "Preprocessing ${source_file} to ${base_name}.daps"
+            VERBATIM
+        )
+    endforeach()
+
+    # Create custom target that depends on all output files
+    add_custom_target(${target_name} DEPENDS ${output_files})
+
+    # Make the target part of the ALL target so it gets built by default
+    # This ensures it gets built when building just this subdirectory
+    set_target_properties(${target_name} PROPERTIES
+        FOLDER "${CMAKE_CURRENT_SOURCE_DIR}"
+        EXCLUDE_FROM_ALL FALSE
+    )
+
+    # Set target properties
+    set_target_properties(${target_name} PROPERTIES
+        DAPS_OUTPUT_DIR ${output_dir}
+        DAPS_OUTPUT_FILES "${output_files}"
+    )
+
+    # Make the target available globally
+    set(${target_name}_OUTPUT_DIR ${output_dir} PARENT_SCOPE)
+    set(${target_name}_OUTPUT_FILES "${output_files}" PARENT_SCOPE)
+endfunction()
+
+# Function to register a DAPSS target for later dependency addition
+# Usage: register_daps_target(target_name)
+# This stores the target name in a global property for later use
+function(register_daps_target target_name)
+    # Store the target name in a global property
+    get_property(registered_targets GLOBAL PROPERTY DAPS_REGISTERED_TARGETS)
+    list(APPEND registered_targets ${target_name})
+    set_property(GLOBAL PROPERTY DAPS_REGISTERED_TARGETS ${registered_targets})
+    message(STATUS "Registered DAPSS target ${target_name} for later dependency addition")
+endfunction()
+
+# Function to add all registered DAPSS targets as dependencies
+# Usage: add_all_daps_dependencies([TARGET main_target] [CONDITION condition_expression])
+# This should be called from the main CMakeLists.txt after all subdirectories are processed
+function(add_all_daps_dependencies)
+    set(options)
+    set(oneValueArgs TARGET CONDITION)
+    set(multiValueArgs)
+    cmake_parse_arguments(DAPS_ALL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    # Default target is PROJECT_NAME
+    if(DAPS_ALL_TARGET)
+        set(dep_target ${DAPS_ALL_TARGET})
+    else()
+        set(dep_target ${PROJECT_NAME})
+    endif()
+
+    # Get all registered targets
+    get_property(registered_targets GLOBAL PROPERTY DAPS_REGISTERED_TARGETS)
+
+    if(registered_targets)
+        foreach(target_name ${registered_targets})
+            if(TARGET ${target_name})
+                if(DAPS_ALL_CONDITION)
+                    if(${DAPS_ALL_CONDITION})
+                        add_dependencies(${dep_target} ${target_name})
+                        message(STATUS "Added registered DAPSS target ${target_name} as dependency of ${dep_target} (condition: ${DAPS_ALL_CONDITION})")
+                    else()
+                        message(STATUS "Skipped registered DAPSS target ${target_name} (condition: ${DAPS_ALL_CONDITION} not met)")
+                    endif()
+                else()
+                    add_dependencies(${dep_target} ${target_name})
+                    message(STATUS "Added registered DAPSS target ${target_name} as dependency of ${dep_target}")
+                endif()
+            else()
+                message(WARNING "Registered DAPSS target ${target_name} does not exist")
+            endif()
+        endforeach()
+    else()
+        message(STATUS "No DAPSS targets registered for dependency addition")
+    endif()
+endfunction()
@@ -0,0 +1,25 @@
+# DebugOpts.cmake - Debug configuration options
+
+# Enable debug locking features
+option(ENABLE_DEBUG_LOCKS "Enable debug features for locking system" ON)
+
+# Enable callable tracing for debugging boost::asio post operations
+option(ENABLE_DEBUG_TRACE_CALLABLES "Enable callable tracing for debugging boost::asio post operations" OFF)
+
+# Qutex deadlock detection configuration
+# Always define the variable in cache so it appears in ccmake
+set(DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS 500 CACHE STRING
+    "Timeout in milliseconds for deadlock detection in qutex system")
+
+if(ENABLE_DEBUG_LOCKS)
+    # Validate the timeout value
+    if(NOT DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS OR DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS STREQUAL "")
+        message(FATAL_ERROR "DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS must be a positive integer > 0")
+    endif()
+
+    # Convert to integer and validate
+    math(EXPR timeout_int "${DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS}")
+    if(timeout_int LESS_EQUAL 0)
+        message(FATAL_ERROR "DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS must be a positive integer > 0")
+    endif()
+endif()
@@ -0,0 +1,63 @@
+# SMO_VERIFY_BOOST_DYNAMIC_DEPENDENCY
+# Verifies that a target file (executable or shared library) has Boost libraries
+# in its dynamic dependency list via ldd.
+#
+# Usage as function:
+#   SMO_VERIFY_BOOST_DYNAMIC_DEPENDENCY(<target_file>)
+#
+# Usage as script (with -P):
+#   cmake -DVERIFY_FILE=<target_file> -P VerifyBoostDynamic.cmake
+#
+# This function/script:
+#   1. Runs ldd on the target file
+#   2. Checks for boost libraries in the dependency list
+#   3. Reports success or failure with appropriate messages
+#
+function(SMO_VERIFY_BOOST_DYNAMIC_DEPENDENCY target_file)
+	_verify_boost_dynamic_dependency("${target_file}")
+endfunction()
+
+# Internal implementation that can be called from script mode or function mode
+function(_verify_boost_dynamic_dependency target_file)
+	if(NOT EXISTS "${target_file}")
+		message(WARNING "SMO_VERIFY_BOOST_DYNAMIC_DEPENDENCY: Target file '${target_file}' does not exist")
+		return()
+	endif()
+
+	# Run ldd on the target file
+	execute_process(
+		COMMAND ldd "${target_file}"
+		OUTPUT_VARIABLE ldd_output
+		ERROR_VARIABLE ldd_error
+		RESULT_VARIABLE ldd_result
+	)
+
+	if(ldd_result)
+		message(WARNING "SMO_VERIFY_BOOST_DYNAMIC_DEPENDENCY: Failed to run ldd on '${target_file}': ${ldd_error}")
+		return()
+	endif()
+
+	# Check if output contains boost libraries
+	string(TOLOWER "${ldd_output}" ldd_output_lower)
+	string(FIND "${ldd_output_lower}" "libboost" boost_found)
+
+	if(boost_found EQUAL -1)
+		message(STATUS "SMO_VERIFY_BOOST_DYNAMIC_DEPENDENCY: WARNING - No Boost libraries found in dependencies of '${target_file}'")
+		message(STATUS "ldd output:")
+		message(STATUS "${ldd_output}")
+	else()
+		# Extract boost library lines
+		string(REGEX MATCHALL "libboost[^\n]*" boost_libs "${ldd_output}")
+		message(STATUS "SMO_VERIFY_BOOST_DYNAMIC_DEPENDENCY: SUCCESS - Boost libraries found in '${target_file}':")
+		foreach(boost_lib ${boost_libs})
+			string(STRIP "${boost_lib}" boost_lib_stripped)
+			message(STATUS "  ${boost_lib_stripped}")
+		endforeach()
+	endif()
+endfunction()
+
+# Script mode: if VERIFY_FILE is defined, run the verification
+if(VERIFY_FILE)
+	_verify_boost_dynamic_dependency("${VERIFY_FILE}")
+endif()
+
@@ -31,7 +31,7 @@ set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 # Search for libraries and headers in the target directories
 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
 set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)

 # Set pkg-config to use the cross-compiled libraries
 set(ENV{PKG_CONFIG_PATH} "/usr/aarch64-linux-gnu/lib/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig")
@@ -0,0 +1,91 @@
+# Clang toolchain file for native builds
+# This file should be used with cmake -DCMAKE_TOOLCHAIN_FILE=cmake/clang-native.cmake
+
+# Disable cross-compilation
+set(CMAKE_CROSSCOMPILING FALSE)
+
+# Target OS (native)
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR})
+
+# Specify the Clang compilers
+set(CMAKE_C_COMPILER clang)
+set(CMAKE_CXX_COMPILER clang++)
+
+# Set Clang-specific compiler flags
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -pedantic")
+
+# Enable C++20 standard (as specified in main CMakeLists.txt)
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Set Clang-specific optimization flags
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -DNDEBUG")
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -DNDEBUG")
+
+# Set debug flags
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O0")
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -g -O0")
+
+# Enable address sanitizer in debug builds (optional)
+# Uncomment the following lines if you want to enable address sanitizer
+# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fno-omit-frame-pointer")
+# set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fsanitize=address -fno-omit-frame-pointer")
+# set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -fsanitize=address")
+# set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} -fsanitize=address")
+
+# Enable undefined behavior sanitizer in debug builds (optional)
+# Uncomment the following lines if you want to enable UBSan
+# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined -fno-omit-frame-pointer")
+# set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fsanitize=undefined -fno-omit-frame-pointer")
+# set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -fsanitize=undefined")
+# set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} -fsanitize=undefined")
+
+# Set native search paths (use system defaults)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
+
+# Clang-specific linker flags
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld")
+
+# Enable link-time optimization in release builds (optional)
+# Uncomment the following lines if you want to enable LTO
+# set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -flto")
+# set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -flto")
+# set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -flto")
+# set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -flto")
+
+# Set Clang-specific C++ features
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -stdlib=libc++")
+
+# Alternative: Use libstdc++ instead of libc++ (uncomment if preferred)
+# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
+# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libstdc++")
+# set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -stdlib=libstdc++")
+
+# Set compiler-specific features
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcolor-diagnostics")
+
+# Enable all warnings and treat them as errors in debug builds (optional)
+# Uncomment the following lines if you want to treat warnings as errors
+# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Werror")
+# set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Werror")
+
+# Set Clang-specific optimization flags
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -mtune=native")
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -march=native -mtune=native")
+
+# Print configuration information
+message(STATUS "Clang toolchain configuration:")
+message(STATUS "  C Compiler: ${CMAKE_C_COMPILER}")
+message(STATUS "  CXX Compiler: ${CMAKE_CXX_COMPILER}")
+message(STATUS "  CXX Standard: ${CMAKE_CXX_STANDARD}")
+message(STATUS "  Build Type: ${CMAKE_BUILD_TYPE}")
+message(STATUS "  Cross-compiling: ${CMAKE_CROSSCOMPILING}")
@@ -0,0 +1,72 @@
+# Generic Flex/Yacc Generation Functions
+# This file provides reusable functions for generating C++ files from Flex/Bison sources
+
+# Function to generate Flex lexer files
+# Usage: generate_flex_lexer(OUTPUT_VAR INPUT_FILE [PREFIX] [HEADER_DEPENDENCY])
+# OUTPUT_VAR: Variable name to store the output file path
+# INPUT_FILE: Path to the .ll input file
+# PREFIX: Optional prefix for the generated files (defaults to basename of input file)
+# HEADER_DEPENDENCY: Optional header file that the lexer depends on (e.g., from Bison)
+function(generate_flex_lexer OUTPUT_VAR INPUT_FILE)
+    get_filename_component(INPUT_BASENAME ${INPUT_FILE} NAME_WE)
+
+    if(ARGC GREATER 2)
+        set(PREFIX ${ARGV2})
+    else()
+        set(PREFIX ${INPUT_BASENAME})
+    endif()
+
+    set(LEX_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${PREFIX}.cc)
+    set(LEX_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${PREFIX}.hh)
+
+    # Set up dependencies
+    set(DEPENDENCIES ${INPUT_FILE})
+    if(ARGC GREATER 3)
+        list(APPEND DEPENDENCIES ${ARGV3})
+    endif()
+
+    add_custom_command(
+        OUTPUT ${LEX_OUTPUT}
+        DEPENDS ${DEPENDENCIES}
+        COMMAND ${FLEX_EXECUTABLE} --header-file=${LEX_HEADER} -o ${LEX_OUTPUT} ${INPUT_FILE}
+        COMMENT "Generating ${PREFIX}.cc from ${INPUT_FILE}"
+    )
+
+    set(${OUTPUT_VAR} ${LEX_OUTPUT} PARENT_SCOPE)
+endfunction()
+
+# Function to generate Bison parser files
+# Usage: generate_bison_parser(OUTPUT_VAR HEADER_VAR INPUT_FILE [PREFIX])
+# OUTPUT_VAR: Variable name to store the output .cc file path
+# HEADER_VAR: Variable name to store the output .hh file path
+# INPUT_FILE: Path to the .yy input file
+# PREFIX: Optional prefix for the generated files (defaults to basename of input file)
+function(generate_bison_parser OUTPUT_VAR HEADER_VAR INPUT_FILE)
+    get_filename_component(INPUT_BASENAME ${INPUT_FILE} NAME_WE)
+
+    if(ARGC GREATER 3)
+        set(PREFIX ${ARGV3})
+    else()
+        set(PREFIX ${INPUT_BASENAME})
+    endif()
+
+    set(YACC_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${PREFIX}.cc)
+    set(YACC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${PREFIX}.hh)
+
+    add_custom_command(
+        OUTPUT ${YACC_OUTPUT} ${YACC_HEADER}
+        DEPENDS ${INPUT_FILE}
+        COMMAND ${BISON_EXECUTABLE} -p ${PREFIX} --header=${YACC_HEADER} -o ${YACC_OUTPUT} ${INPUT_FILE}
+        COMMENT "Generating ${PREFIX}.cc and ${PREFIX}.hh from ${INPUT_FILE}"
+    )
+
+    set(${OUTPUT_VAR} ${YACC_OUTPUT} PARENT_SCOPE)
+    set(${HEADER_VAR} ${YACC_HEADER} PARENT_SCOPE)
+endfunction()
+
+# Generate device attachment parser files using the generic functions
+# Generate Bison parser first (creates the header file)
+generate_bison_parser(YACC_OUTPUT YACC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/deviceManager/deviceAttachmentPipeSpecp.yy deviceAttachmentPipeSpecp)
+
+# Generate Flex lexer with dependency on Bison header
+generate_flex_lexer(LEX_OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/deviceManager/deviceAttachmentPipeSpecl.ll deviceAttachmentPipeSpecl ${YACC_HEADER})
@@ -0,0 +1,70 @@
+# ----------------------------------------------------------------------------------
+# MANDATORY USER VARIABLE
+# ----------------------------------------------------------------------------------
+# IMPORTANT: This variable MUST be set when running CMake to specify where the
+# laptop's sysroot (the root directory of the mounted laptop filesystem) is located.
+#
+# Usage example: cmake -DCMAKE_TOOLCHAIN_FILE=laptop_x86_sysroot.cmake
+#                      -DTARGET_SYSROOT=/mnt/laptop_sysroot/ <path_to_source>
+#
+# If the variable is not defined, we fall back to a common system root path for safety.
+if(NOT DEFINED TARGET_SYSROOT)
+    set(TARGET_SYSROOT "/usr/lib/x86_64-linux-gnu")
+    message(STATUS "TARGET_SYSROOT not explicitly defined. Defaulting to ${TARGET_SYSROOT}")
+endif()
+message(STATUS "Using TARGET_SYSROOT: ${TARGET_SYSROOT}")
+
+set(TARGET_TRIPLE x86_64-linux-gnu) # Standard Debian/Ubuntu triple
+
+# ----------------------------------------------------------------------------------
+# SYSROOT and COMPILER CONFIGURATION
+# ----------------------------------------------------------------------------------
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_SYSROOT ${TARGET_SYSROOT})
+message(STATUS "Using CMAKE_SYSROOT: ${CMAKE_SYSROOT}")
+
+# The CMAKE_FIND_ROOT_PATH tells CMake where to look for programs, libraries, etc.
+set(CMAKE_FIND_ROOT_PATH ${CMAKE_SYSROOT})
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+# 1. Architecture and Platform Identification
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR x86_64)
+
+set(CMAKE_C_COMPILER   ${TARGET_TRIPLE}-gcc)
+set(CMAKE_CXX_COMPILER ${TARGET_TRIPLE}-g++)
+
+# ----------------------------------------------------------------------------------
+# PKG-CONFIG CONFIGURATION (CRUCIAL FOR CROSS-COMPILING)
+# ----------------------------------------------------------------------------------
+
+# 1. Define the search path for .pc files, relative to the sysroot.
+# This ensures we look in the target's standard pkgconfig locations.
+set(PKG_CONFIG_SEARCH_PATHS
+    "${CMAKE_SYSROOT}/usr/lib/${TARGET_TRIPLE}/pkgconfig" # Primary location on Debian/Ubuntu
+    "${CMAKE_SYSROOT}/usr/share/pkgconfig"               # Secondary shared location
+    "${CMAKE_SYSROOT}/usr/lib/pkgconfig"                 # Another common location
+)
+
+# Join the paths using the system's path separator (colon on Linux)
+string(REPLACE ";" ":" PKG_CONFIG_LIBDIR_STRING "${PKG_CONFIG_SEARCH_PATHS}")
+
+# Set the environment variable PKG_CONFIG_LIBDIR
+# This tells pkg-config exactly where to find the x86_64 .pc files.
+# 2. Set the sysroot directory for pkg-config
+# This tells pkg-config to prepend CMAKE_SYSROOT to any paths it finds in the .pc files.
+set(ENV{PKG_CONFIG_SYSROOT_DIR} ${CMAKE_SYSROOT})
+set(ENV{PKG_CONFIG_LIBDIR} ${PKG_CONFIG_LIBDIR_STRING})
+set(ENV{PKG_CONFIG_PATH} "")
+
+message(STATUS "PKG_CONFIG_SYSROOT_DIR set to: ${CMAKE_SYSROOT}")
+message(STATUS "PKG_CONFIG_LIBDIR set to: ${PKG_CONFIG_LIBDIR_STRING}")
+
+# ----------------------------------------------------------------------------------
+# CMAkE FIND BEHAVIOR
+# ----------------------------------------------------------------------------------
+
@@ -1 +1,3 @@
 add_subdirectory(xcbXorg)
+add_subdirectory(livoxProto1)
+add_subdirectory(attachmentSupport)
@@ -0,0 +1,26 @@
+add_library(attachmentSupport SHARED
+	compute.cpp
+	stimulusProducer.cpp
+	stagingBuffer.cpp
+)
+
+target_include_directories(attachmentSupport PUBLIC
+	${Boost_INCLUDE_DIRS}
+	${CMAKE_SOURCE_DIR}/include
+	${CMAKE_BINARY_DIR}/include
+)
+
+target_link_libraries(attachmentSupport PUBLIC
+	Boost::system
+	Boost::log
+)
+
+# Verify Boost dynamic dependencies after build
+add_custom_command(TARGET attachmentSupport POST_BUILD
+	COMMAND ${CMAKE_COMMAND} -DVERIFY_FILE="$<TARGET_FILE:attachmentSupport>"
+		-P ${CMAKE_SOURCE_DIR}/cmake/VerifyBoostDynamic.cmake
+	COMMENT "Verifying Boost dynamic dependencies for attachmentSupport"
+)
+
+# Install rules
+install(TARGETS attachmentSupport DESTINATION lib)
@@ -0,0 +1,147 @@
+#include <user/compute.h>
+#include <stdexcept>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <string_view>
+
+namespace smo {
+namespace compute {
+
+// Helper function to parse OpenCL version string
+static std::pair<int, int> parseOpenClVersion(const std::string& versionStr)
+{
+	size_t spacePos = versionStr.find(' ');
+	if (spacePos == std::string::npos) { return {-1, -1}; }
+
+	std::string versionNum = versionStr.substr(spacePos + 1);
+	size_t dotPos = versionNum.find('.');
+	if (dotPos == std::string::npos) { return {-1, -1}; }
+
+	try {
+		int major = std::stoi(versionNum.substr(0, dotPos));
+		int minor = std::stoi(versionNum.substr(dotPos + 1));
+		return {major, minor};
+	} catch (const std::exception&) {
+		return {-1, -1};
+	}
+}
+
+// Implementation of validateOpenClVersion (declared in user/compute.h)
+bool validateOpenClVersion(
+	std::string_view versionStr, std::string_view versionType,
+	int minMajor, int minMinor)
+{
+	auto [major, minor] = parseOpenClVersion(std::string(versionStr));
+
+	if (major == -1 && minor == -1)
+	{
+		std::cerr << __func__ << ": failed to parse OpenCL " << versionType
+			<< " version: " << versionStr << std::endl;
+		return false;
+	}
+
+	if (major < minMajor || (major == minMajor && minor < minMinor))
+	{
+		std::cerr << __func__ << ": OpenCL " << versionType << " version "
+			<< major << "." << minor << " found, but " << minMajor << "."
+			<< minMinor << " or higher is required" << std::endl;
+		return false;
+	}
+
+	std::cout << __func__ << ": OpenCL " << versionType << " version: "
+		<< versionStr << std::endl;
+	return true;
+}
+
+ComputeDevice::ComputeDevice(cl_platform_id platformId, cl_device_id deviceId)
+: platform(platformId), device(deviceId),
+context(nullptr), commandQueue(nullptr)
+{
+	cl_int err;
+
+	// Create context for this device
+	context = clCreateContext(
+		nullptr, 1, &device,
+		nullptr, nullptr, &err);
+
+	if (err != CL_SUCCESS || !context)
+	{
+		throw std::runtime_error(
+			std::string(__func__) + ": failed to create context for device: " +
+			std::to_string(err));
+	}
+
+	// Create command queue
+	cl_command_queue_properties queueProps = 0;
+	commandQueue = clCreateCommandQueue(
+		context, device, queueProps, &err);
+
+	if (err != CL_SUCCESS || !commandQueue)
+	{
+		clReleaseContext(context);
+		context = nullptr;
+		throw std::runtime_error(
+			std::string(__func__) + ": failed to create command queue for "
+			"device: " + std::to_string(err));
+	}
+}
+
+ClBuffer::ClBuffer(void* hostPtr, size_t size, cl_mem_flags flags,
+	const std::vector<std::shared_ptr<ComputeDevice>>& devices)
+	: hostPtr(hostPtr), size(size), flags(flags)
+{
+	associations.reserve(devices.size());
+
+	// Create a buffer for each device's context
+	for (const auto& device : devices)
+	{
+		if (!device->context) { continue; }
+
+		cl_int err;
+		cl_mem_flags bufferFlags = CL_MEM_USE_HOST_PTR | flags;
+		cl_mem buffer = clCreateBuffer(
+			device->context,
+			bufferFlags,
+			size, hostPtr,
+			&err);
+
+		if (err != CL_SUCCESS || !buffer)
+		{
+			// Release any buffers already created before throwing
+			for (auto& assoc : associations)
+			{
+				if (assoc.buffer) {
+					clReleaseMemObject(assoc.buffer);
+				}
+			}
+			throw std::runtime_error(
+				std::string(__func__) + ": failed to create buffer for "
+				"device: " + std::to_string(err));
+		}
+
+		associations.emplace_back(buffer, device);
+	}
+}
+
+cl_mem ClBuffer::getAssociatedBufferHandleForDevice(
+	const std::shared_ptr<ComputeDevice>& device) const
+{
+	if (!device)
+	{
+		throw std::invalid_argument(std::string(__func__)
+			+ ": device is nullptr");
+	}
+
+	for (const auto& assoc : associations)
+	{
+		if (assoc.device == device) {
+			return assoc.buffer;
+		}
+	}
+
+	return nullptr;
+}
+
+} // namespace compute
+} // namespace smo
@@ -0,0 +1,236 @@
+#include <user/stagingBuffer.h>
+#include <unistd.h>
+#include <cstdint>
+#include <stdexcept>
+#include <sys/mman.h>
+#include <vector>
+
+#include <user/frameAssemblyDesc.h>
+
+namespace smo {
+namespace stim_buff {
+
+// Static defaults for io_uring
+const StagingBuffer::IOEngineConstraints
+StagingBuffer::IOEngineConstraints::ioUringConstraints(
+	// slotStartAlignmentByteVal (page alignment for DMA)
+	static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
+	// slotPadToNBytes (MTU 1500 - UDP/IP header 28)
+	1472,
+	// frameStartAlignmentByteVal (page alignment for DMA)
+	static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
+	// framePadToNBytes (MTU 1500 - UDP/IP header 28)
+	static_cast<size_t>(sysconf(_SC_PAGE_SIZE))
+);
+
+// Static defaults for OpenCL input
+const StagingBuffer::IOEngineConstraints
+StagingBuffer::IOEngineConstraints::openClInputConstraints(
+	// slotStartAlignmentByteVal (page alignment)
+	static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
+	// slotPadToNBytes (XYZI point size)
+	16,
+	// frameStartAlignmentByteVal (page alignment)
+	static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
+	// framePadToNBytes (pointer size)
+	static_cast<size_t>(sysconf(_SC_PAGE_SIZE))
+);
+
+// Helper function to calculate maximum alignment needed for first slot
+// (must satisfy both frame and slot alignment)
+static size_t calculateMaxAlignment(
+	size_t frameStartAlignmentByteVal,
+	size_t slotStartAlignmentByteVal)
+{
+	if (frameStartAlignmentByteVal >= slotStartAlignmentByteVal)
+	{
+		if (frameStartAlignmentByteVal % slotStartAlignmentByteVal == 0)
+			{ return frameStartAlignmentByteVal; }
+		else
+		{
+			// Need LCM, but for simplicity use the larger alignment
+			// In practice, alignments are usually powers of 2, so this should work
+			return std::max(
+				frameStartAlignmentByteVal, slotStartAlignmentByteVal);
+		}
+	}
+	else
+	{
+		if (slotStartAlignmentByteVal % frameStartAlignmentByteVal == 0)
+			{ return slotStartAlignmentByteVal; }
+		else
+		{
+			return std::max(
+				frameStartAlignmentByteVal, slotStartAlignmentByteVal);
+		}
+	}
+}
+
+void StagingBuffer::computeSlotStrideAndBufferSize()
+{
+	// Slot stride is the maximum of alignment and padding, rounded up to a multiple of alignment
+	size_t minSlotStride = std::max(
+		inputConstraints.slotStartAlignmentByteVal,
+		inputConstraints.slotPadToNBytes);
+	slotStrideNBytes = ((minSlotStride + inputConstraints.slotStartAlignmentByteVal - 1)
+		/ inputConstraints.slotStartAlignmentByteVal)
+		* inputConstraints.slotStartAlignmentByteVal;
+
+	// Calculate maximum alignment needed for first slot (must satisfy both frame and slot alignment)
+	size_t maxAlignment = calculateMaxAlignment(
+		inputConstraints.frameStartAlignmentByteVal,
+		inputConstraints.slotStartAlignmentByteVal);
+
+	// Calculate minimum buffer size
+	size_t minBufferSize = std::max(
+		inputConstraints.framePadToNBytes,
+		inputConstraints.slotPadToNBytes);
+
+	// Calculate total size needed for nSlots slots
+	size_t slotAreaSize = nSlots * slotStrideNBytes;
+
+	// Add padding space at buffer start for alignment offset (worst case: max alignment - 1)
+	size_t alignmentPadding = maxAlignment - 1;
+
+	// Total size needed: alignment padding + slot area, then ensure minimum is met
+	size_t rawSize = alignmentPadding + slotAreaSize;
+	if (rawSize < minBufferSize)
+		{ rawSize = minBufferSize; }
+
+	// Align up to the maximum alignment to ensure we can always find a valid offset
+	bufferNBytes = ((rawSize + maxAlignment - 1) / maxAlignment) * maxAlignment;
+}
+
+// Static member function to calculate offset and validate invariants
+size_t StagingBuffer::calculateFirstSlotOffsetAndValidate(
+	uint8_t* buffer,
+	size_t bufferNBytes,
+	size_t nSlots,
+	size_t slotStrideNBytes,
+	const StagingBuffer::IOEngineConstraints& inputConstraints)
+{
+	// Calculate maximum alignment needed for first slot
+	size_t maxAlignment = calculateMaxAlignment(
+		inputConstraints.frameStartAlignmentByteVal,
+		inputConstraints.slotStartAlignmentByteVal);
+
+	// Calculate offset to align first slot to both frame and slot alignment
+	uintptr_t bufferAddr = reinterpret_cast<uintptr_t>(buffer);
+	uintptr_t alignedAddr = ((bufferAddr + maxAlignment - 1) / maxAlignment)
+		* maxAlignment;
+	size_t firstSlotOffsetNBytes = alignedAddr - bufferAddr;
+
+	// Validate invariants with exceptions
+	uint8_t* firstSlotAddr = buffer + firstSlotOffsetNBytes;
+	if (
+		reinterpret_cast<uintptr_t>(firstSlotAddr)
+		% inputConstraints.frameStartAlignmentByteVal != 0)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": StagingBuffer: first slot address not aligned to "
+			+ std::to_string(inputConstraints.frameStartAlignmentByteVal));
+	}
+
+	if (
+		reinterpret_cast<uintptr_t>(firstSlotAddr)
+		% inputConstraints.slotStartAlignmentByteVal != 0)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": StagingBuffer: first slot address not aligned to "
+			+ std::to_string(inputConstraints.slotStartAlignmentByteVal));
+	}
+
+	size_t minBufferSize = std::max(
+		inputConstraints.framePadToNBytes,
+		inputConstraints.slotPadToNBytes);
+	if (bufferNBytes < minBufferSize)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": StagingBuffer: buffer size less than minimum required (max of "
+			+ std::to_string(inputConstraints.framePadToNBytes)
+			+ " and "
+			+ std::to_string(inputConstraints.slotPadToNBytes)
+			+ ")");
+	}
+
+	if (firstSlotOffsetNBytes + nSlots * slotStrideNBytes
+		> bufferNBytes)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": StagingBuffer: buffer size insufficient to hold "
+			+ std::to_string(nSlots)
+			+ " slots with proper alignment and padding");
+	}
+
+	return firstSlotOffsetNBytes;
+}
+
+StagingBuffer::StagingBuffer(
+	const IOEngineConstraints& inputEngineConstraints_,
+	const IOEngineConstraints& /*outputEngineConstraints*/,
+	size_t nSlots)
+: buffer(nullptr, MmapDeleter(0)), bufferNBytes(0),
+nSlots(nSlots), slotStrideNBytes(0),
+firstSlotOffsetNBytes(0),
+inputConstraints(inputEngineConstraints_),
+assemblingFlag(false)
+{
+	if (nSlots == 0)
+	{
+		throw std::invalid_argument(std::string(__func__)
+			+ ": StagingBuffer: nSlots must be > 0");
+	}
+
+	computeSlotStrideAndBufferSize();
+
+	/* Allocate buffer using mmap() for io_uring registration
+	 * MAP_ANONYMOUS | MAP_PRIVATE creates anonymous, non-file-backed memory
+	 */
+	void* mmapped = mmap(
+		nullptr, bufferNBytes,
+		PROT_READ | PROT_WRITE,
+		MAP_ANONYMOUS | MAP_PRIVATE,
+		-1, 0);
+
+	if (mmapped == MAP_FAILED)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": StagingBuffer: mmap() failed");
+	}
+
+	buffer = std::unique_ptr<uint8_t, MmapDeleter>(
+		static_cast<uint8_t*>(mmapped), MmapDeleter(bufferNBytes));
+	currentNBytes.store(0);
+
+	// Lock the buffer in memory to prevent swapping
+	if (mlock(buffer.get(), bufferNBytes) != 0)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": StagingBuffer: mlock() failed");
+	}
+
+	// Calculate offset and validate invariants (helper function in .cpp)
+	firstSlotOffsetNBytes = StagingBuffer::calculateFirstSlotOffsetAndValidate(
+		buffer.get(), bufferNBytes, nSlots,
+		slotStrideNBytes, inputConstraints);
+
+	// Build FrameAssemblyDesc once
+	std::vector<FrameAssemblyDesc::SlotDesc> slots;
+	slots.reserve(nSlots);
+	uint8_t *frameBase = buffer.get() + firstSlotOffsetNBytes;
+	for (size_t i = 0; i < nSlots; ++i)
+	{
+		size_t off = i * slotStrideNBytes;
+		FrameAssemblyDesc::SlotDesc s{
+			off, frameBase + off, inputConstraints.slotPadToNBytes};
+
+		slots.push_back(s);
+	}
+
+	frameDesc = std::make_shared<FrameAssemblyDesc>(
+		nSlots, inputConstraints.slotPadToNBytes, bufferNBytes,
+		std::move(slots));
+}
+
+} // namespace stim_buff
+} // namespace smo
@@ -0,0 +1,184 @@
+#include <boostAsioLinkageFix.h>
+#include <config.h>
+#include <iostream>
+#include <chrono>
+#include <algorithm>
+#include <boost/asio/io_service.hpp>
+#include <boost/asio/deadline_timer.hpp>
+#include <boost/system/error_code.hpp>
+#include <opts.h>
+#include <componentThread.h>
+#include <spinLock.h>
+#include <user/stimulusProducer.h>
+#include <user/stimulusBuffer.h>
+
+namespace smo {
+namespace stim_buff {
+
+std::shared_ptr<StimulusBuffer> StimulusProducer::getAttachedStimulusBuffer(
+	const std::shared_ptr<device::DeviceAttachmentSpec>& spec) const
+{
+	for (const auto& buffer : attachedStimulusBuffers)
+	{
+		if (buffer && buffer->deviceAttachmentSpec &&
+			*buffer->deviceAttachmentSpec == *spec)
+		{
+			return buffer;
+		}
+	}
+
+	return nullptr;
+}
+
+bool StimulusProducer::hasBufferWithQualeIfaceApi(
+	const std::string& qualeIfaceApi) const
+{
+	for (const auto& buffer : attachedStimulusBuffers)
+	{
+		if (!buffer || !buffer->deviceAttachmentSpec)
+		{
+			throw std::runtime_error(
+				"StimulusProducer::hasBufferWithQualeIfaceApi: encountered "
+				"null buffer or null deviceAttachmentSpec in "
+				"attachedStimulusBuffers (should never happen)");
+		}
+
+		if (buffer->deviceAttachmentSpec->qualeIfaceApi != qualeIfaceApi)
+			{ continue; }
+
+		return true;
+	}
+
+	return false;
+}
+
+void StimulusProducer::destroyAttachedStimulusBuffer(
+	const std::shared_ptr<StimulusBuffer>& buffer)
+{
+	if (!buffer) { return; }
+
+	auto it = std::find(
+		attachedStimulusBuffers.begin(),
+		attachedStimulusBuffers.end(),
+		buffer);
+
+	if (it != attachedStimulusBuffers.end()) {
+		attachedStimulusBuffers.erase(it);
+	}
+}
+
+void StimulusProducer::stop()
+{
+	{
+		SpinLock::Guard lock(shouldContinueLock);
+		shouldContinue = false;
+	}
+
+	// Cancel timer immediately
+	timer.cancel();
+
+	std::cout << __func__ << ": Stopped stimulus producer for device "
+		<< deviceAttachmentSpec->deviceSelector << std::endl;
+}
+
+void StimulusProducer::scheduleNextTimeout(int delayMs)
+{
+	if (!shouldContinue)
+		{ return; }
+
+	// Schedule the next timeout using the provided delay
+	timer.expires_from_now(
+		boost::posix_time::milliseconds(delayMs));
+
+	timer.async_wait(
+		std::bind(
+			&StimulusProducer::onTimeout, this, std::placeholders::_1));
+}
+
+void StimulusProducer::onTimeout(const boost::system::error_code& error)
+{
+	// Timer was cancelled, which is expected when stopping
+	if (error == boost::asio::error::operation_aborted) {
+		return;
+	}
+
+	if (error)
+	{
+		std::cerr << "StimulusProducer: Timer error: " << error.message()
+			<< std::endl;
+		return;
+	}
+
+	SpinLock::Guard lock(shouldContinueLock);
+	if (!shouldContinue)
+		{ return; }
+
+	/**	EXPLANATION:
+	 * We need to ensure that there's only ever one stimframe being produced
+	 * during any CONFIG_STIMBUFF_FRAME_PERIOD_MS period. To guarantee this, we
+	 * use a spinlock.
+	 *
+	 * When a new frame is to be produced, the async producer will first acquire
+	 * the frameAssemblyLimiter spinlock. This way, when the next timeout is
+	 * fired it can check whether its predecessor stimframe has finished being
+	 * produced. If the preceding stimframe is still being produced, then we'll
+	 * sleep for CONFIG_STIMBUFF_FRAME_RETRY_DELAY_MS ms before trying again.
+	 */
+	int nextWakeupDelayMs;
+	bool deferred = false;
+	if (frameAssemblyRateLimiter.tryAcquire())
+	{
+		nextWakeupDelayMs = CONFIG_STIMBUFF_FRAME_PERIOD_MS;
+
+		// Check if we're ending a deferral period
+		if (nDeferrals > 0)
+		{
+			auto deferralEndTime = std::chrono::high_resolution_clock::now();
+			auto duration = deferralEndTime - deferralStartTime;
+			auto durationMs = std::chrono::duration_cast<
+				std::chrono::milliseconds>(duration);
+
+			std::cout << __func__ << ": Deferral period ended. "
+				<< "Total deferrals: " << nDeferrals
+				<< ", Duration: " << durationMs.count() << "ms" << std::endl;
+
+			nDeferrals = 0;
+		}
+
+		/**	EXPLANATION:
+		 * Call the derived class's frame production handler
+		 * Note: The derived class's frame production handler (aka
+		 * its implementation of stimFrameProductionTimesliceInd()) must
+		 * release the lock when frame production completes
+		 */
+		stimFrameProductionTimesliceInd();
+	}
+	else
+	{
+		nextWakeupDelayMs = CONFIG_STIMBUFF_FRAME_RETRY_DELAY_MS;
+		deferred = true;
+
+		++nDeferrals;
+		// If this is first deferral, capture start stamp and print message
+		if (nDeferrals == 1)
+		{
+			deferralStartTime = std::chrono::high_resolution_clock::now();
+			std::cerr << __func__ << ": Deferral period beginning. "
+				"Configured deferral period: " << nextWakeupDelayMs << "ms"
+				<< std::endl;
+		}
+	}
+
+	scheduleNextTimeout(nextWakeupDelayMs);
+
+	// FIXME: We should be able to release the start/stop lock at this point.
+
+	if (deferred && OptionParser::getOptions().verbose)
+	{
+		std::cerr << __func__ << ": Deferring frame by " << nextWakeupDelayMs
+			<< "ms due to rate limit." << std::endl;
+	}
+}
+
+} // namespace stim_buff
+} // namespace smo
@@ -0,0 +1,29 @@
+option(ENABLE_LIB_livoxProto1 "Enable Livox Protocol v1 backend lib" ON)
+
+if(ENABLE_LIB_livoxProto1)
+	add_library(livoxProto1 SHARED
+		livoxProto1.cpp
+		core.cpp
+		device.cpp
+		protocol.cpp
+		broadcastListener.cpp
+		udpCommandDemuxer.cpp
+	)
+
+	# Set config define for header generation
+	add_compile_definitions(CONFIG_LIB_LIVOXPROTO1_ENABLED)
+	target_include_directories(livoxProto1 PUBLIC ${Boost_INCLUDE_DIRS})
+	target_link_libraries(livoxProto1 PUBLIC
+		Boost::system Boost::log
+		attachmentSupport)
+
+	# Verify Boost dynamic dependencies after build
+	add_custom_command(TARGET livoxProto1 POST_BUILD
+		COMMAND ${CMAKE_COMMAND} -DVERIFY_FILE="$<TARGET_FILE:livoxProto1>"
+			-P ${CMAKE_SOURCE_DIR}/cmake/VerifyBoostDynamic.cmake
+		COMMENT "Verifying Boost dynamic dependencies for livoxProto1"
+	)
+
+	# Install rules
+	install(TARGETS livoxProto1 DESTINATION lib)
+endif()
@@ -0,0 +1,195 @@
+#include <algorithm>
+#include <iostream>
+#include <functional>
+#include <opts.h>
+#include <componentThread.h>
+#include "broadcastListener.h"
+#include "core.h"
+
+namespace livoxProto1 {
+namespace comms {
+
+BroadcastListener::BroadcastListener(
+	const std::shared_ptr<smo::ComponentThread>& componentThread,
+	uint16_t listeningPort, uint16_t connectPort
+)
+: componentThread(componentThread),
+listeningPort(listeningPort),
+connectPort(connectPort),
+deviceGoneAwayCb(nullptr),
+socket(componentThread->getIoService()),
+listeningEndpoint(boost::asio::ip::udp::v4(), listeningPort),
+isListening(false)
+{
+}
+
+std::shared_ptr<DiscoveredDevice>
+BroadcastListener::getDevice(const std::string &deviceIdentifier) const
+{
+	auto it = std::find_if(discoveredDevices.begin(), discoveredDevices.end(),
+		[&deviceIdentifier](const std::shared_ptr<DiscoveredDevice>& device) {
+			return comms::deviceIdentifiersEqual(
+				device->deviceIdentifier, deviceIdentifier);
+		}
+	);
+
+	return it != discoveredDevices.end() ? *it : nullptr;
+}
+
+void BroadcastListener::broadcastMsgInd(
+	const boost::system::error_code& ec, std::size_t bytes_received)
+{
+	if (ec)
+	{
+		std::cerr << __func__ << ": Error receiving broadcast message: "
+			<< ec.message() << std::endl;
+		return;
+	}
+
+	if (bytes_received < sizeof(BroadcastMessage))
+	{
+		std::cerr << __func__
+			<< ": Received packet too small: " << bytes_received
+			<< " bytes (expected at least "
+			<< sizeof(BroadcastMessage) << ")" << std::endl;
+		return;
+	}
+
+	// Use placement new to construct BroadcastMessage in the buffer
+	BroadcastMessage* msg = new (bcastMsgRecvBuffer) BroadcastMessage;
+
+	// Following the clean receiving flow:
+	// 1. Swap CRC32 to host endianness first
+	msg->footer.swapCrc32ToHostEndianness();
+	// 2. Validate CRC32 (on whole message excluding footer CRC32 field)
+	if (!msg->validateCrc32())
+	{
+		std::cerr << __func__
+			<< ": Broadcast message failed CRC32 validation" << std::endl;
+		return;
+	}
+
+	// 3. Swap CRC16 to host endianness
+	msg->header.swapCrc16ToHostEndianness();
+	// 4. Validate CRC16 (on header only)
+	if (!msg->header.validateCrc16())
+	{
+		std::cerr << __func__
+			<< ": Broadcast message failed CRC16 validation" << std::endl;
+		return;
+	}
+	// 5. Swap content to host endianness
+	msg->swapContentsToHostEndianness();
+	// 6. Validate message sanity
+	if (!msg->sanityCheck())
+	{
+		std::cerr << __func__
+			<< ": Broadcast message failed sanity check" << std::endl;
+		return;
+	}
+
+	// Extract device information
+	std::string senderIP = senderEndpoint.address().to_string();
+	std::string broadcastCode(
+		reinterpret_cast<const char*>(msg->broadcast_code));
+
+	// Early return if device already exists
+	smo::SpinLock::Guard lock(isListeningLock);
+
+	if (deviceExists(broadcastCode))
+	{
+		// Device already exists, just log the update
+		if (getProtoState().smoCallbacks.OptionParser_getOptions().verbose)
+		{
+			std::cout << __func__
+				<< ": Received broadcast from known device: "
+				<< broadcastCode << " at " << senderIP << "\n";
+		}
+	}
+	else
+	{
+		// Create new DiscoveredDevice using conversion constructor
+		auto device = std::make_shared<DiscoveredDevice>(*msg, senderIP);
+		discoveredDevices.push_back(device);
+		// Output device information using stringify
+		std::cout << __func__ << ": Discovered new Livox device: "
+			<< device->stringify() << "\n";
+	}
+
+	startReceive();
+}
+
+void BroadcastListener::start(void)
+{
+	if (isListening) { return; }
+
+	try
+	{
+		/**		EXPLANATION:
+		 * Set up a boost::asio udp listening socket on the broadcast listening
+		 * port.
+		 *
+		 *		FIXME:
+		 * We should also set up a timer to check for devices that have gone
+		 * away.
+		 */
+		{
+			smo::SpinLock::Guard lock(isListeningLock);
+
+			socket.open(boost::asio::ip::udp::v4());
+			socket.bind(listeningEndpoint);
+
+			isListening = true;
+		}
+
+		// Start the first async receive operation
+		startReceive();
+		std::cout << __func__ << ": BroadcastListener started on port "
+			<< listeningPort << std::endl;
+	}
+	catch (const boost::system::system_error& e)
+	{
+		isListening = false;
+		std::cerr << __func__ << ": Failed to start BroadcastListener: "
+			<< e.what() << std::endl;
+		throw;
+	}
+}
+
+void BroadcastListener::startReceive(void)
+{
+	if (!isListening) { return; }
+
+	socket.async_receive_from(
+		boost::asio::buffer(bcastMsgRecvBuffer, sizeof(bcastMsgRecvBuffer)),
+		senderEndpoint,
+		std::bind(
+			&BroadcastListener::broadcastMsgInd, this,
+			std::placeholders::_1, std::placeholders::_2)
+	);
+}
+
+void BroadcastListener::stop(void)
+{
+	{
+		smo::SpinLock::Guard lock(isListeningLock);
+		if (!isListening) { return; }
+
+		isListening = false;
+	}
+
+	try
+	{
+		socket.close();
+		std::cout << __func__ << ": BroadcastListener stopped" << std::endl;
+	}
+	catch (const boost::system::system_error& e)
+	{
+		std::cerr << __func__ << ": Error stopping BroadcastListener: " << e.what()
+			<< std::endl;
+		throw;
+	}
+}
+
+} // namespace comms
+} // namespace livoxProto1
@@ -0,0 +1,80 @@
+#ifndef BROADCAST_LISTENER_H
+#define BROADCAST_LISTENER_H
+
+#include <boostAsioLinkageFix.h>
+#include <vector>
+#include <string>
+#include <memory>
+#include <atomic>
+#include <boost/asio/ip/udp.hpp>
+#include <user/senseApiDesc.h>
+#include <spinLock.h>
+#include "device.h"
+
+namespace livoxProto1 {
+namespace comms {
+
+/**		EXPLANATION:
+ * This class merely listens for UDP bcast dgrams on the designated listening
+ * port. It then builds a list of client device IP addrs that it has heard from.
+ * It doesn't connect to them or signal any events to the rest of the lib,
+ * except in the case that a device which the lib is using has gone away.
+ *
+ * Other than that, its role is to tell the lib which devices are available
+ * on the network.
+ */
+#define UDP_BCAST_MSG_BUFFER_NBYTES		(1024)
+
+class BroadcastListener
+{
+public:
+	BroadcastListener(
+		const std::shared_ptr<smo::ComponentThread>& componentThread,
+		uint16_t listeningPort=55000, uint16_t connectPort=65000);
+
+	~BroadcastListener() = default;
+
+	typedef void (DeviceGoneAwayCbFn)(const DiscoveredDevice &device);
+	void setDeviceGoneAwayCb(DeviceGoneAwayCbFn *cb)
+		{ deviceGoneAwayCb = cb; }
+
+	bool deviceExists(const std::string &deviceIdentifier) const
+		{ return getDevice(deviceIdentifier) != nullptr; }
+
+	std::shared_ptr<DiscoveredDevice>
+	getDevice(const std::string &deviceIdentifier) const;
+
+	void start(void);
+	void stop(void);
+
+	void broadcastMsgInd(
+		const boost::system::error_code& ec, std::size_t bytes_received);
+
+private:
+	void startReceive(void);
+
+private:
+	std::shared_ptr<smo::ComponentThread> componentThread;
+	/**		EXPLANATION:
+	 * The Livox proto says that client devices will spam broadcast UDP
+	 * dgrams to us on the listening port. We can then use the source IP from
+	 * the bcast dgram to figure out the client device's IP addr. Then we
+	 * should send a connect dgram to the connect port. This will tell the
+	 * client device our IP addr.
+	 */
+	uint16_t listeningPort, connectPort;
+	DeviceGoneAwayCbFn *deviceGoneAwayCb;
+	std::vector<std::shared_ptr<DiscoveredDevice>> discoveredDevices;
+
+	boost::asio::ip::udp::socket socket;
+	boost::asio::ip::udp::endpoint listeningEndpoint, senderEndpoint;
+	smo::SpinLock isListeningLock;
+	bool isListening;
+
+	uint8_t bcastMsgRecvBuffer[UDP_BCAST_MSG_BUFFER_NBYTES];
+};
+
+} // namespace comms
+} // namespace livoxProto1
+
+#endif // BROADCAST_LISTENER_H
@@ -0,0 +1,276 @@
+#include <algorithm>
+#include <iostream>
+#include <functional>
+#include <optional>
+#include <opts.h>
+#include <asynchronousContinuation.h>
+#include <callback.h>
+#include <user/senseApiDesc.h>
+#include "protocol.h"
+#include "core.h"
+#include "device.h"
+#include "broadcastListener.h"
+#include "livoxProto1.h"
+
+
+namespace livoxProto1 {
+
+static ProtoState protoState =
+{
+	.isInitialized = false,
+	.componentThread = nullptr,
+	.deviceManager = nullptr,
+	.smoCallbacks = {}
+};
+
+ProtoState& getProtoState()
+{
+	return protoState;
+}
+
+DeviceManager::DeviceManager()
+: broadcastListener(protoState.componentThread),
+  udpCommandDemuxer(protoState.componentThread, *this)
+{
+	broadcastListener.setDeviceGoneAwayCb(deviceGoneAwayInd);
+}
+
+void DeviceManager::deviceGoneAwayInd(const comms::DiscoveredDevice &device)
+{
+	std::cout << "Device gone away: " << device.stringify() << std::endl;
+
+	// Check if device exists in our collection
+	if (!protoState.deviceManager->getDevice(device)) {
+		return;
+	}
+
+	// Find and remove the device from the collection
+	auto it = std::find_if(
+		protoState.deviceManager->devices.begin(),
+		protoState.deviceManager->devices.end(),
+		[&device](const std::shared_ptr<Device> &d) {
+			return d->discoveredDevice == device;
+		}
+	);
+	if (it != protoState.deviceManager->devices.end()) {
+		protoState.deviceManager->devices.erase(it);
+	}
+}
+
+std::optional<std::shared_ptr<Device>> DeviceManager::getDevice(
+	const std::string &deviceIdentifier
+	)
+{
+	for (auto& device : devices)
+	{
+		if (comms::deviceIdentifiersEqual(
+			device->discoveredDevice.deviceIdentifier, deviceIdentifier))
+		{
+			return device;
+		}
+	}
+	return std::nullopt;
+}
+
+// GetOrCreateDeviceReq nested class implementation
+class DeviceManager::GetOrCreateDeviceReq
+:	public smo::NonPostedAsynchronousContinuation<
+		livoxProto1_getOrCreateDeviceReqCbFn>
+{
+public:
+	DeviceManager& deviceManager;
+	// The device we're trying to connect (holds all connection parameters)
+	std::shared_ptr<Device> pendingDevice;
+
+public:
+	GetOrCreateDeviceReq(
+		DeviceManager& mgr,
+		std::shared_ptr<Device> device,
+		smo::Callback<livoxProto1_getOrCreateDeviceReqCbFn> cb)
+	:	smo::NonPostedAsynchronousContinuation<
+			livoxProto1_getOrCreateDeviceReqCbFn>(std::move(cb)),
+	deviceManager(mgr), pendingDevice(device)
+	{}
+
+	// Public accessor for the original callback
+	void callOriginalCallback(bool success, std::shared_ptr<Device> device)
+		{ callOriginalCb(success, device); }
+
+	void callOriginalCallbackWithFailure()
+		{ callOriginalCallback(false, nullptr); }
+
+	void getOrCreateDeviceReq1(
+		std::shared_ptr<GetOrCreateDeviceReq> context, bool connectSuccess
+		)
+	{
+		if (!connectSuccess)
+		{
+			std::cerr << __func__ << ": Connection failed for device "
+				<< context->pendingDevice->discoveredDevice.deviceIdentifier
+				<< std::endl;
+			context->callOriginalCallbackWithFailure();
+			return;
+		}
+
+		// Connection successful, add device to collection
+		context->deviceManager.devices.push_back(context->pendingDevice);
+		if (getProtoState().smoCallbacks.OptionParser_getOptions().verbose)
+		{
+			std::cout << __func__ << ": Successfully connected and added device "
+				<< context->pendingDevice->discoveredDevice.deviceIdentifier
+				<< std::endl;
+		}
+
+		// Return success with the connected device
+		context->callOriginalCallback(true, context->pendingDevice);
+	}
+};
+
+void DeviceManager::getOrCreateDeviceReq(
+	const std::string &deviceIdentifier,
+	const std::shared_ptr<smo::ComponentThread>& componentThread,
+	int commandTimeoutMs, int retryDelayMs,
+	const std::string& smoIp, uint8_t smoSubnetNbits,
+	uint16_t dataPort, uint16_t cmdPort, uint16_t imuPort,
+	smo::Callback<livoxProto1_getOrCreateDeviceReqCbFn> callback)
+{
+	// Validate smoIp format using Boost.Asio IPv4 validation
+	if (!smoIp.empty() && !comms::isValidIPv4(smoIp))
+	{
+		throw std::invalid_argument(
+			std::string(__func__) +
+			": Invalid IPv4 smoIp format: " + smoIp);
+	}
+
+	// Validate subnet nbits
+	if (smoSubnetNbits > 32)
+	{
+		throw std::invalid_argument(
+			std::string(__func__) +
+			": smoSubnetNbits must be between 0 and 32, got: " +
+			std::to_string(smoSubnetNbits));
+	}
+
+	// First try to get existing device
+	auto existingDevice = getDevice(deviceIdentifier);
+	if (existingDevice)
+	{
+		// Device already exists and is connected, return it
+		callback.callbackFn(true, existingDevice.value());
+		return;
+	}
+
+	// Device doesn't exist, create a new one but don't add it to collection yet
+	auto newDevice = std::make_shared<Device>(
+		deviceIdentifier, componentThread,
+		commandTimeoutMs, retryDelayMs,
+		smoIp, smoSubnetNbits,
+		dataPort, cmdPort, imuPort);
+
+	// Create the continuation request object to hold state and callbacks
+	auto request = std::make_shared<GetOrCreateDeviceReq>(
+		*this, newDevice, std::move(callback));
+
+	// Start the connection process - only add to collection on success
+	request->pendingDevice->connectReq(
+		{request, std::bind(
+			&DeviceManager::GetOrCreateDeviceReq::getOrCreateDeviceReq1,
+			request.get(), request, std::placeholders::_1)});
+}
+
+class DeviceManager::DestroyDeviceReq
+:	public smo::NonPostedAsynchronousContinuation<
+		livoxProto1_destroyDeviceReqCbFn>
+{
+public:
+	DeviceManager& deviceManager;
+	std::shared_ptr<Device> pendingDevice;
+
+public:
+	DestroyDeviceReq(
+		DeviceManager& mgr,
+		std::shared_ptr<Device> device,
+		smo::Callback<livoxProto1_destroyDeviceReqCbFn> cb)
+	:	smo::NonPostedAsynchronousContinuation<
+			livoxProto1_destroyDeviceReqCbFn>(std::move(cb)),
+	deviceManager(mgr), pendingDevice(device)
+	{}
+
+	// Public accessor for the original callback
+	void callOriginalCallback(bool success)
+		{ callOriginalCb(success); }
+
+	void callOriginalCallbackWithFailure()
+		{ callOriginalCallback(false); }
+
+	void destroyDeviceReq1(
+		std::shared_ptr<DestroyDeviceReq> context, bool success
+		)
+	{
+		context->deviceManager.devices.erase(
+			std::remove(
+				context->deviceManager.devices.begin(),
+				context->deviceManager.devices.end(),
+				context->pendingDevice),
+			context->deviceManager.devices.end());
+
+		context->callOriginalCallback(success);
+	}
+};
+
+void DeviceManager::destroyDeviceReq(
+	std::shared_ptr<Device> dev,
+	smo::Callback<livoxProto1_destroyDeviceReqCbFn> callback
+)
+{
+	/**		EXPLANATION:
+	 * Check to see if the device is in our collection. If so, call
+	 * disconnectReq and then remove it.
+	 */
+	std::shared_ptr<Device> device = getDevice(dev->discoveredDevice).
+		value_or(nullptr);
+
+	if (!device || device->nAttachedStimulusProducers > 0)
+	{
+		callback.callbackFn(false);
+		return;
+	}
+
+	auto request = std::make_shared<DestroyDeviceReq>(
+		*this, device, std::move(callback));
+
+	device->disconnectReq(
+		{request, std::bind(
+			&DeviceManager::DestroyDeviceReq::destroyDeviceReq1,
+			request.get(), request, std::placeholders::_1)});
+}
+
+void main(const std::shared_ptr<smo::ComponentThread> &componentThread,
+	const smo::stim_buff::SmoCallbacks& smoCallbacks)
+{
+	if (protoState.isInitialized) {
+		return;
+	}
+
+	protoState.isInitialized = true;
+	protoState.componentThread = componentThread;
+	protoState.smoCallbacks = smoCallbacks;
+	protoState.deviceManager = std::make_unique<DeviceManager>();
+	protoState.deviceManager->broadcastListener.start();
+	protoState.deviceManager->udpCommandDemuxer.start();
+}
+
+void exit(void)
+{
+	if (!protoState.isInitialized) {
+		return;
+	}
+
+	protoState.deviceManager->udpCommandDemuxer.stop();
+	protoState.deviceManager->broadcastListener.stop();
+	protoState.deviceManager.reset();
+	protoState.componentThread.reset();
+	protoState.isInitialized = false;
+}
+
+} // namespace livoxProto1
@@ -0,0 +1,80 @@
+#ifndef LIVOXPROTO1_CORE_H
+#define LIVOXPROTO1_CORE_H
+
+#include <vector>
+#include <string>
+#include <memory>
+#include <cstdint>
+#include <optional>
+#include <user/senseApiDesc.h>
+#include "device.h"
+#include "broadcastListener.h"
+#include "udpCommandDemuxer.h"
+#include "livoxProto1.h"
+#include <callback.h>
+
+namespace livoxProto1 {
+
+class DeviceManager
+{
+public:
+	DeviceManager();
+	~DeviceManager() = default;
+
+	static void deviceGoneAwayInd(const comms::DiscoveredDevice &device);
+
+	void getOrCreateDeviceReq(
+		const std::string &deviceIdentifier,
+		const std::shared_ptr<smo::ComponentThread>& componentThread,
+		int commandTimeoutMs, int retryDelayMs,
+		const std::string& smoIp, uint8_t smoSubnetNbits,
+		uint16_t dataPort, uint16_t cmdPort, uint16_t imuPort,
+		smo::Callback<livoxProto1_getOrCreateDeviceReqCbFn> callback);
+
+	void destroyDeviceReq(
+		std::shared_ptr<Device> device,
+		smo::Callback<livoxProto1_destroyDeviceReqCbFn> callback);
+
+	std::optional<std::shared_ptr<Device>> getDevice(
+		const std::string &deviceIdentifier);
+
+	std::optional<std::shared_ptr<Device>> getDevice(
+		const comms::DiscoveredDevice &device)
+	{
+		return getDevice(device.deviceIdentifier);
+	}
+
+private:
+	// Configuration
+	static constexpr int RETRY_DELAY_SECONDS = 3; // <N> seconds delay
+
+public:
+	std::vector<std::shared_ptr<Device>> devices;
+	comms::BroadcastListener broadcastListener;
+	comms::UdpCommandDemuxer udpCommandDemuxer;
+
+	// Nested continuation class for async device creation
+	class GetOrCreateDeviceReq;
+	class DestroyDeviceReq;
+};
+
+void main(
+	const std::shared_ptr<smo::ComponentThread> &componentThread,
+	const smo::stim_buff::SmoCallbacks& smoCallbacks);
+void exit(void);
+
+// Global state structure
+struct ProtoState
+{
+	bool isInitialized = false;
+	std::shared_ptr<smo::ComponentThread> componentThread;
+	std::unique_ptr<DeviceManager> deviceManager;
+	smo::stim_buff::SmoCallbacks smoCallbacks;
+};
+
+// Access to global state for extern "C" functions
+ProtoState& getProtoState();
+
+} // namespace livoxProto1
+
+#endif // LIVOXPROTO1_CORE_H
@@ -0,0 +1,270 @@
+#ifndef LIVOX_PROTO1_DEVICE_H
+#define LIVOX_PROTO1_DEVICE_H
+
+#include <boostAsioLinkageFix.h>
+#include <string>
+#include <cstdint>
+#include <cstddef>
+#include <memory>
+#include <atomic>
+#include <optional>
+#include <functional>
+#include <unordered_map>
+#include <stdexcept>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <boost/asio/deadline_timer.hpp>
+#include <boost/asio/posix/stream_descriptor.hpp>
+#include "protocol.h"
+#include <callback.h>
+#include <spinLock.h>
+
+// Custom hash function for std::pair<uint8_t, uint8_t>
+namespace std {
+	template<>
+	struct hash<std::pair<uint8_t, uint8_t>> {
+		size_t operator()(const std::pair<uint8_t, uint8_t>& p) const noexcept {
+			return (static_cast<size_t>(p.first) << 8) | static_cast<size_t>(p.second);
+		}
+	};
+}
+
+// Forward declaration
+namespace smo {
+	class ComponentThread;
+}
+
+namespace livoxProto1 {
+namespace comms {
+
+/**		EXPLANATION:
+ * This class represents a discovered device. It is used to store the
+ * device identifier and IP address of a discovered device.
+ */
+class DiscoveredDevice
+{
+public:
+	DiscoveredDevice(
+		const std::string &deviceIdentifier,
+		DeviceType deviceType,
+		const std::string &ipAddr);
+
+	// "Conversion" constructor from BroadcastMessage
+	DiscoveredDevice(const BroadcastMessage &msg, const std::string &ipAddr);
+
+	~DiscoveredDevice() = default;
+
+	bool operator==(const DiscoveredDevice &other) const
+	{
+		return comms::deviceIdentifiersEqual(
+			deviceIdentifier, other.deviceIdentifier);
+	}
+
+	std::string stringify(void) const;
+	std::string getDeviceTypeName(void) const;
+
+public:
+	std::string deviceIdentifier;
+	DeviceType deviceType;
+	std::string ipAddr;
+};
+
+} // namespace comms
+
+class Device
+{
+public:
+	Device(const std::string &deviceIdentifier,
+		const std::shared_ptr<smo::ComponentThread>& componentThread,
+		int commandTimeoutMs, int retryDelayMs,
+		const std::string& smoIp, uint8_t smoSubnetNbits,
+		uint16_t dataPort, uint16_t cmdPort, uint16_t imuPort);
+	~Device();
+
+private:
+	// Heartbeat mechanism
+	void startHeartbeat();
+	void stopHeartbeat();
+	void sendHeartbeat();
+	void onHeartbeatTimer(const boost::system::error_code& error);
+	std::string generateClientDeviceIpFromSerialNumber(
+		const std::string& broadcastCode);
+
+	// IP detection methods
+	std::optional<std::string> detectSmoIp(const std::string& deviceIP);
+	uint32_t getSubnetMaskFor(uint8_t nbits);
+
+	class ConnectReq;
+	class ConnectToKnownDeviceReq;
+	class ConnectByDeviceIdentifierReq;
+	class ExecuteHandshakeReq;
+	class DisconnectReq;
+	class EnablePcloudDataReq;
+	class DisablePcloudDataReq;
+	class SetReturnModeReq;
+	class GetReturnModeReq;
+
+public:
+	enum class ReturnMode : uint8_t
+	{
+		SingleFirst		= 0x00,
+		SingleStrongest	= 0x01,
+		Dual			= 0x02,
+		Triple			= 0x03
+	};
+
+	/**
+	 * Get the number of points per datagram based on return mode
+	 * @param returnMode The return mode (0=SingleFirst, 1=SingleStrongest, 2=Dual, 3=Triple)
+	 * @return Number of points per datagram
+	 */
+	static inline size_t getNPointsPerDgram(int returnMode)
+	{
+		/*
+		 * Map modes to points per datagram based on Livox docs
+		 * 1: first, 2: strongest -> 96 samples => 96 points
+		 * 3: dual -> 48 samples * 2 points = 96
+		 * 4: triple -> 30 samples * 3 points = 90
+		 */
+		switch (returnMode)
+		{
+		case static_cast<int>(ReturnMode::SingleFirst):
+		case static_cast<int>(ReturnMode::SingleStrongest):
+		case static_cast<int>(ReturnMode::Dual):
+			return 96u;
+		case static_cast<int>(ReturnMode::Triple):
+			return 90u;
+		default:
+			throw std::runtime_error(
+				std::string(__func__) + ": Unknown returnMode "
+				+ std::to_string(returnMode));
+		}
+	}
+
+	// Utility methods
+	std::optional<std::string> getSmoIp(const std::string& deviceIP);
+
+	// Callback function type definitions for async methods
+	typedef std::function<void(bool success)> connectReqCbFn;
+	typedef std::function<
+		void(bool success, const std::string& ipAddr)>
+		connectToKnownDeviceReqCbFn;
+	typedef std::function<
+		void(bool success, const std::string& ipAddr)>
+		connectByDeviceIdentifierReqCbFn;
+	typedef std::function<void(bool success)> executeHandshakeReqCbFn;
+	typedef std::function<void(bool success)> disconnectReqCbFn;
+	typedef std::function<void(bool success)> enablePcloudDataReqCbFn;
+	typedef std::function<void(bool success)> disablePcloudDataReqCbFn;
+	typedef std::function<void(bool success)> setReturnModeReqCbFn;
+	typedef std::function<void(bool success, uint8_t returnMode)>
+		getReturnModeReqCbFn;
+
+	// Async connection methods
+	void connectReq(smo::Callback<connectReqCbFn> callback);
+	void connectToKnownDeviceReq(
+		smo::Callback<connectToKnownDeviceReqCbFn> callback);
+	void connectByDeviceIdentifierReq(
+		smo::Callback<connectByDeviceIdentifierReqCbFn> callback);
+	void executeHandshakeReq(
+		const std::string& deviceIP,
+		smo::Callback<executeHandshakeReqCbFn> callback);
+	void disconnectReq(smo::Callback<disconnectReqCbFn> callback);
+	void enablePcloudDataReq(smo::Callback<enablePcloudDataReqCbFn> callback);
+	void disablePcloudDataReq(smo::Callback<disablePcloudDataReqCbFn> callback);
+	void setReturnModeReq(
+		uint8_t returnMode, smo::Callback<setReturnModeReqCbFn> callback);
+	void getReturnModeReq(smo::Callback<getReturnModeReqCbFn> callback);
+
+public:
+	comms::DiscoveredDevice discoveredDevice;
+	std::atomic<size_t> nAttachedStimulusProducers;
+
+	// Configuration
+	std::shared_ptr<smo::ComponentThread> componentThread;
+	int commandTimeoutMs, retryDelayMs;
+	std::string smoIp;
+	std::string detectedSmoListeningIp;
+	uint8_t smoSubnetNbits;
+	uint16_t dataPort, cmdPort, imuPort;
+
+	// Heartbeat state
+	std::unique_ptr<boost::asio::deadline_timer> heartbeatTimer;
+	std::atomic<bool> heartbeatActive;
+	smo::SpinLock heartbeatActiveLock;
+
+	// Point cloud data state
+	std::atomic<bool> pcloudDataActive;
+
+	// Cached last-known return mode for this device
+	ReturnMode currentReturnMode = ReturnMode::SingleFirst;
+
+public:
+	// UDP datagram handling
+	void handleUdpDgram(
+		const uint8_t* data, ssize_t bytesReceived,
+		const struct sockaddr_in& senderAddr);
+
+	// Command handler registration
+	void registerUdpCommandHandler(
+		uint8_t cmd_set, uint8_t cmd_id,
+		std::function<void(
+			const uint8_t* data, ssize_t bytesReceived,
+			const struct sockaddr_in& senderAddr)> handler,
+		const std::string& deviceIP = "");
+
+	void unregisterUdpCommandHandler(
+		uint8_t cmd_set, uint8_t cmd_id, const std::string& deviceIP = "");
+
+private:
+	// Point cloud data setup
+	void cleanupPcloudDataSocket();
+
+	/**		EXPLANATION:
+	 * This is the "straightforward" map of command set and command id to
+	 * handlers. This is useful for any commands which are guaranteed to be
+	 * issued to the device *AFTER* the device has successfully been added
+	 * to the DeviceManager's list of devices.
+	 *
+	 * I.e: it cannot be used for commands which are issued to the device before
+	 * getOrCreateDevice() has added the device to the DeviceManager's list of
+	 * devices.
+	 */
+	// Command handler map
+	std::unordered_map<
+		std::pair<uint8_t, uint8_t>,
+		std::function<void(
+			const uint8_t* data, ssize_t bytesReceived,
+			const struct sockaddr_in& senderAddr)>> udpCommandHandlers;
+
+public:
+	/**		EXPLANATION:
+	 * This is the "temporary" map of command set and command id to
+	 * handlers. This is useful for any commands which are issued to the device
+	 * while it is being constructed.
+	 *
+	 * I.e: it shouldn't be used for cmds which are issued to the device after
+	 * getOrCreateDevice() has added the device to the DeviceManager's list of
+	 * devices. It will work for such commands, but we'd kind of prefer to use
+	 * the "straightforward" map above for such commands.
+	 *
+	 *	NOTE:
+	 * There's a strong argument to be made for just getting rid of the
+	 * "straightforward" map above and just using this one, tho.
+	 */
+	struct CommandHandler {
+		uint8_t cmd_set;
+		uint8_t cmd_id;
+		std::function<void(
+			const uint8_t* data, ssize_t bytesReceived,
+			const struct sockaddr_in& senderAddr)> handler;
+	};
+	static std::unordered_map<std::string, std::vector<CommandHandler>>
+		devicesUnderConstruction;
+};
+
+} // namespace livoxProto1
+
+#endif // LIVOX_PROTO1_DEVICE_H
@@ -0,0 +1,123 @@
+#include <boostAsioLinkageFix.h>
+#include <stdexcept>
+#include <callback.h>
+#include <boost/asio/posix/stream_descriptor.hpp>
+#include "livoxProto1.h"
+#include "device.h"
+#include "core.h"
+#include "udpCommandDemuxer.h"
+
+
+extern "C" {
+
+void livoxProto1_getOrCreateDeviceReq(
+	const std::string& deviceIdentifier,
+	const std::shared_ptr<smo::ComponentThread>& componentThread,
+	int commandTimeoutMs, int retryDelayMs,
+	const std::string& smoIp, uint8_t smoSubnetNbits,
+	uint16_t dataPort, uint16_t cmdPort, uint16_t imuPort,
+	smo::Callback<livoxProto1_getOrCreateDeviceReqCbFn> callback
+)
+{
+	// Get the global DeviceManager instance
+	auto& protoState = livoxProto1::getProtoState();
+	if (!protoState.deviceManager)
+	{
+		throw std::runtime_error(
+			std::string(__func__) + ": LivoxProto1 not initialized - call "
+			"livoxProto1_main first");
+	}
+
+	// Delegate to DeviceManager
+	protoState.deviceManager->getOrCreateDeviceReq(
+		deviceIdentifier, componentThread,
+		commandTimeoutMs, retryDelayMs,
+		smoIp, smoSubnetNbits,
+		dataPort, cmdPort, imuPort,
+		callback);
+}
+
+void livoxProto1_destroyDeviceReq(
+	std::shared_ptr<livoxProto1::Device> device,
+	smo::Callback<livoxProto1_destroyDeviceReqCbFn> callback
+)
+{
+	auto& protoState = livoxProto1::getProtoState();
+	if (!protoState.deviceManager)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": DeviceManager not initialized");
+	}
+
+	protoState.deviceManager->destroyDeviceReq(
+		device, callback);
+}
+
+void livoxProto1_main(
+	const std::shared_ptr<smo::ComponentThread>& componentThread,
+	const smo::stim_buff::SmoCallbacks& smoCallbacks)
+{
+	livoxProto1::main(componentThread, smoCallbacks);
+}
+
+void livoxProto1_exit(void)
+{
+	livoxProto1::exit();
+}
+
+void livoxProto1_device_enablePcloudDataReq(
+	std::shared_ptr<livoxProto1::Device> device,
+	smo::Callback<livoxProto1_device_enablePcloudDataReqCbFn> callback
+)
+{
+	if (!device)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": Device pointer is null");
+	}
+
+	device->enablePcloudDataReq(callback);
+}
+
+void livoxProto1_device_disablePcloudDataReq(
+	std::shared_ptr<livoxProto1::Device> device,
+	smo::Callback<livoxProto1_device_disablePcloudDataReqCbFn> callback
+)
+{
+	if (!device)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": Device pointer is null");
+	}
+
+	device->disablePcloudDataReq(callback);
+}
+
+void livoxProto1_device_getReturnModeReq(
+	std::shared_ptr<livoxProto1::Device> device,
+	smo::Callback<livoxProto1_device_getReturnModeReqCbFn> callback
+)
+{
+	if (!device)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": Device pointer is null");
+	}
+
+	device->getReturnModeReq(callback);
+}
+
+std::shared_ptr<boost::asio::posix::stream_descriptor>
+livoxProto1_getPcloudDataFdDesc(void)
+{
+	auto& protoState = livoxProto1::getProtoState();
+	if (!protoState.deviceManager)
+	{
+		throw std::runtime_error(std::string(__func__)
+			+ ": DeviceManager not initialized");
+	}
+
+	return protoState.deviceManager->udpCommandDemuxer.getPcloudDataFdDesc();
+}
+
+} // extern "C"
@@ -0,0 +1,107 @@
+#ifndef LIVOXPROTO1_H
+#define LIVOXPROTO1_H
+
+#include <boostAsioLinkageFix.h>
+#include <memory>
+#include <string>
+#include <cstdint>
+#include <functional>
+#include <callback.h>
+#include <boost/asio/posix/stream_descriptor.hpp>
+
+// Forward declarations
+namespace smo {
+namespace stim_buff {
+	struct SmoCallbacks;
+}
+	class ComponentThread;
+}
+
+namespace livoxProto1 {
+	class Device;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Initialize the Livox protocol library
+ * @param componentThread Component thread shared pointer
+ * @param smoCallbacks Callbacks provided by SMO
+ */
+typedef void livoxProto1_mainFn(
+	const std::shared_ptr<smo::ComponentThread>& componentThread,
+	const smo::stim_buff::SmoCallbacks& smoCallbacks);
+
+/**
+ * Cleanup the Livox protocol library
+ */
+typedef void livoxProto1_exitFn(void);
+
+/**
+ * Create a new Livox device connection
+ * @param deviceIdentifier The device identifier (broadcast code)
+ * @param componentThread Component thread for async operations
+ * @param commandTimeoutMs Command timeout in milliseconds (default: 1000)
+ * @param retryDelayMs Retry delay in milliseconds (default: 3000)
+ * @param smoIp SMO IP address (empty string for auto-detection)
+ * @param smoSubnetNbits SMO subnet mask bits (e.g., 24 for /24, 16 for /16)
+ * @param dataPort Data port for point cloud (default: 56000)
+ * @param cmdPort Command port (default: 56001)
+ * @param imuPort IMU port (default: 56002)
+ * @return Device pointer on success, nullptr on failure
+ */
+typedef std::function<
+	void(bool success, std::shared_ptr<livoxProto1::Device> device)>
+	livoxProto1_getOrCreateDeviceReqCbFn;
+
+typedef void livoxProto1_getOrCreateDeviceReqFn(
+	const std::string& deviceIdentifier,
+	const std::shared_ptr<smo::ComponentThread>& componentThread,
+	int commandTimeoutMs, int retryDelayMs,
+	const std::string& smoIp, uint8_t smoSubnetNbits,
+	uint16_t dataPort,  uint16_t cmdPort, uint16_t imuPort,
+	smo::Callback<livoxProto1_getOrCreateDeviceReqCbFn> callback);
+
+typedef std::function<void(bool success)> livoxProto1_destroyDeviceReqCbFn;
+typedef void livoxProto1_destroyDeviceReqFn(
+	std::shared_ptr<livoxProto1::Device> device,
+	smo::Callback<livoxProto1_destroyDeviceReqCbFn> callback);
+
+typedef std::function<void(bool success)>
+	livoxProto1_device_enablePcloudDataReqCbFn;
+typedef void livoxProto1_device_enablePcloudDataReqFn(
+	std::shared_ptr<livoxProto1::Device> device,
+	smo::Callback<livoxProto1_device_enablePcloudDataReqCbFn> callback);
+
+typedef std::function<void(bool success)>
+	livoxProto1_device_disablePcloudDataReqCbFn;
+typedef void livoxProto1_device_disablePcloudDataReqFn(
+	std::shared_ptr<livoxProto1::Device> device,
+	smo::Callback<livoxProto1_device_disablePcloudDataReqCbFn> callback);
+
+typedef std::function<void(bool success, uint8_t returnMode)>
+	livoxProto1_device_getReturnModeReqCbFn;
+typedef void livoxProto1_device_getReturnModeReqFn(
+	std::shared_ptr<livoxProto1::Device> device,
+	smo::Callback<livoxProto1_device_getReturnModeReqCbFn> callback);
+
+typedef std::shared_ptr<boost::asio::posix::stream_descriptor>
+	livoxProto1_getPcloudDataFdDescFn(void);
+
+livoxProto1_mainFn livoxProto1_main;
+livoxProto1_exitFn livoxProto1_exit;
+livoxProto1_getOrCreateDeviceReqFn livoxProto1_getOrCreateDeviceReq;
+livoxProto1_destroyDeviceReqFn livoxProto1_destroyDeviceReq;
+livoxProto1_device_enablePcloudDataReqFn livoxProto1_device_enablePcloudDataReq;
+livoxProto1_device_disablePcloudDataReqFn
+	livoxProto1_device_disablePcloudDataReq;
+livoxProto1_device_getReturnModeReqFn livoxProto1_device_getReturnModeReq;
+livoxProto1_getPcloudDataFdDescFn livoxProto1_getPcloudDataFdDesc;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // LIVOXPROTO1_H
@@ -0,0 +1,854 @@
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <cstring>
+#include "protocol.h"
+
+namespace livoxProto1 {
+namespace comms {
+
+// Command methods
+void Command::swapToHostEndianness()
+{
+	// No multi-byte fields to swap
+}
+
+void Command::swapToProtocolEndianness()
+{
+	// No multi-byte fields to swap
+}
+
+bool Command::sanityCheck() const
+{
+	// Basic validation - can be extended for specific command sets
+	return true;
+}
+
+// Header methods
+void Header::swapToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	length = __builtin_bswap16(length);
+	seq_num = __builtin_bswap16(seq_num);
+	crc_16 = __builtin_bswap16(crc_16);
+}
+
+void Header::swapToProtocolEndianness()
+{
+	// Protocol is little-endian, so if host is already little-endian, no swap needed
+	if (endian::isLittleEndian()) { return; }
+	// Host is big-endian, need to swap to little-endian
+	length = __builtin_bswap16(length);
+	seq_num = __builtin_bswap16(seq_num);
+	crc_16 = __builtin_bswap16(crc_16);
+}
+
+bool Header::sanityCheck() const
+{
+	return (sof == 0xAA) && (version == 1);
+}
+
+uint16_t Header::calculateCrc16() const
+{
+	// Calculate CRC16 for the header excluding the crc_16 field itself
+	// This matches the Livox SDK approach: calculate over raw bytes excluding CRC16 field
+	const uint8_t* headerData = reinterpret_cast<const uint8_t*>(this);
+	size_t headerSize = sizeof(Header) - sizeof(crc_16); // Exclude CRC16 field
+
+	return comms::calculateCrc16(headerData, headerSize);
+}
+
+bool Header::validateCrc16() const
+{
+	// Calculate CRC16 for the header excluding the crc_16 field itself
+	uint16_t calculatedCrc = calculateCrc16();
+
+	// Compare with the CRC in the header
+	bool isValid = (calculatedCrc == crc_16);
+
+	// Debug output only if validation fails
+	if (!isValid) {
+		std::cout << "CRC16 Debug: calculated=0x" << std::hex << calculatedCrc
+			<< ", received=0x" << crc_16 << std::dec << std::endl;
+	}
+
+	return isValid;
+}
+
+void Header::setCrc16FromRawBytes()
+{
+	// Calculate CRC16 on raw bytes and set it (after endianness swap)
+	crc_16 = calculateCrc16();
+}
+
+void Header::swapCrc16ToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	crc_16 = __builtin_bswap16(crc_16);
+}
+
+void Header::swapCrc16ToProtocolEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	crc_16 = __builtin_bswap16(crc_16);
+}
+
+// Footer methods
+void Footer::swapToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	crc_32 = __builtin_bswap32(crc_32);
+}
+
+void Footer::swapToProtocolEndianness()
+{
+	// Protocol is little-endian, so if host is already little-endian, no swap needed
+	if (endian::isLittleEndian()) { return; }
+	// Host is big-endian, need to swap to little-endian
+	crc_32 = __builtin_bswap32(crc_32);
+}
+
+void Footer::swapCrc32ToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	crc_32 = __builtin_bswap32(crc_32);
+}
+
+void Footer::swapCrc32ToProtocolEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	crc_32 = __builtin_bswap32(crc_32);
+}
+
+bool Footer::validateCrc32() const
+{
+	// This method should validate the CRC32 against the message content
+	// For now, we'll return true since the validation is done on raw bytes
+	// before struct construction in the receiving flow
+	return true;
+}
+
+bool Footer::sanityCheck() const
+{
+	/**		FIXME:
+	 * Add CRC validation here.
+	 */
+	return true;
+}
+
+// BroadcastMessage methods
+void BroadcastMessage::swapContentsToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	// Only swap content fields, not CRC fields
+	header.swapToHostEndianness();
+	command.swapToHostEndianness();
+	reserved = __builtin_bswap16(reserved);
+	// Note: footer.swapToHostEndianness() swaps CRC, so we skip it here
+}
+
+
+bool BroadcastMessage::sanityCheck() const
+{
+	return header.sanityCheck() &&
+		command.sanityCheck() &&
+		(command.cmd_set == 0x00) &&
+		(command.cmd_id == 0x00) &&
+		(header.cmd_type == 0x02) &&
+		footer.sanityCheck();
+}
+
+bool BroadcastMessage::validateCrc32() const
+{
+	// Calculate CRC32 for the entire message excluding the footer.crc_32 field
+	// Try calculating on the raw bytes of the entire message (excluding CRC field)
+	uint32_t calculatedCrc = 0xFFFFFFFF;
+
+	// Calculate CRC32 over the entire message except the CRC field itself
+	// The message structure is: header + command + broadcast_code + dev_type + reserved + footer(without crc_32)
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(BroadcastMessage) - sizeof(footer.crc_32);
+
+	calculatedCrc = comms::calculateCrc32(messageData, messageSize);
+
+	// Compare with the CRC in the footer
+	bool isValid = (calculatedCrc == footer.crc_32);
+
+	// Debug output only if validation fails
+	if (!isValid) {
+		std::cout << "BroadcastMessage CRC32 Debug: calculated=0x" << std::hex << calculatedCrc
+			<< ", received=0x" << footer.crc_32 << std::dec << std::endl;
+	}
+
+	return isValid;
+}
+
+// HandshakeRequest methods
+HandshakeRequest::HandshakeRequest(
+	const std::string& hostIP,
+	uint16_t dataPort, uint16_t cmdPort, uint16_t imuPort
+	)
+{
+	// Initialize header
+	header.sof = 0xAA;
+	header.version = 1;
+	header.length = sizeof(HandshakeRequest);
+	header.cmd_type = 0x00;  // CMD (request)
+	header.seq_num = 1;      // Sequence number
+	header.crc_16 = 0;       // Will be calculated later
+
+	// Initialize command
+	command.cmd_set = 0x00;  // General Command Set
+	command.cmd_id = 0x01;   // Handshake Command
+
+	// Parse host IP address
+	std::istringstream iss(hostIP);
+	std::string token;
+	int i = 0;
+	while (std::getline(iss, token, '.') && i < 4)
+	{
+		user_ip[i] = static_cast<uint8_t>(std::stoi(token));
+		i++;
+	}
+
+	// Set ports
+	this->data_port = dataPort;
+	this->cmd_port = cmdPort;
+	this->imu_port = imuPort;
+
+	// Initialize footer
+	footer.crc_32 = 0;  // Will be calculated later
+	// Note: CRC16 will be calculated before sending (in swapToProtocolEndianness)
+}
+
+uint32_t HandshakeRequest::calculateCrc32() const
+{
+	// Calculate CRC32 for the entire message excluding the footer.crc_32 field
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(HandshakeRequest) - sizeof(footer.crc_32);
+
+	return comms::calculateCrc32(messageData, messageSize);
+}
+
+void HandshakeRequest::swapContentsToProtocolEndianness()
+{
+	// Protocol uses little-endian, so on little-endian machines, no swap needed
+	if (endian::isLittleEndian()) { return; }
+
+	// On big-endian machines, swap to little-endian for wire transmission
+	// Only swap content fields, not CRC fields
+	header.swapToHostEndianness();
+	command.swapToHostEndianness();
+	data_port = __builtin_bswap16(data_port);
+	cmd_port = __builtin_bswap16(cmd_port);
+	imu_port = __builtin_bswap16(imu_port);
+	// Note: footer.swapToHostEndianness() swaps CRC, so we skip it here
+}
+
+// HandshakeResponse methods
+void HandshakeResponse::swapContentsToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	// Only swap content fields, not CRC fields
+	header.swapToHostEndianness();
+	command.swapToHostEndianness();
+	// Note: footer.swapToHostEndianness() swaps CRC, so we skip it here
+}
+
+
+bool HandshakeResponse::sanityCheck() const
+{
+	return header.sanityCheck() &&
+		command.sanityCheck() &&
+		(command.cmd_set == 0x00) && (command.cmd_id == 0x01) &&
+		footer.sanityCheck();
+}
+
+bool HandshakeResponse::validateCrc32() const
+{
+	// Calculate CRC32 for the entire message excluding the footer.crc_32 field
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(HandshakeResponse) - sizeof(footer.crc_32);
+	uint32_t calculatedCrc = comms::calculateCrc32(messageData, messageSize);
+
+	// Compare with the CRC in the footer
+	bool isValid = (calculatedCrc == footer.crc_32);
+
+	// Debug output only if validation fails
+	if (!isValid) {
+		std::cout << "HandshakeResponse CRC32 Debug: calculated=0x" << std::hex << calculatedCrc
+			<< ", received=0x" << footer.crc_32 << std::dec << std::endl;
+	}
+
+	return isValid;
+}
+
+// Standalone CRC16 calculation utility
+uint16_t calculateCrc16(const uint8_t* data, size_t length)
+{
+	/**		EXPLANATION:
+	 * Livox SDK CRC16 implementation (exact copy from FastCRC library)
+	 * This matches the exact implementation used by Livox devices
+	 */
+	static const uint16_t crc_table_mcrf4xx[1024] = {
+		0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+		0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+		0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+		0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+		0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+		0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+		0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+		0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+		0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+		0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+		0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+		0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+		0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+		0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+		0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+		0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+		0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+		0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+		0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+		0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+		0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+		0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+		0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+		0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+		0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+		0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+		0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+		0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+		0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+		0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+		0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+		0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78,
+		0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+		0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+		0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+		0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+		0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+		0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+		0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+		0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+		0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+		0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+		0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+		0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+		0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+		0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+		0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+		0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+		0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+		0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+		0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+		0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+		0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+		0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+		0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+		0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+		0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+		0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+		0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+		0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+		0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+		0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+		0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+		0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78,
+		0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+		0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+		0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+		0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+		0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+		0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+		0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+		0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+		0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+		0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+		0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+		0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+		0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+		0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+		0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+		0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+		0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+		0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+		0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+		0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+		0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+		0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+		0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+		0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+		0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+		0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+		0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+		0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+		0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+		0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+		0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+		0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78,
+		0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+		0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+		0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+		0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+		0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+		0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+		0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+		0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+		0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+		0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+		0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+		0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+		0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+		0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+		0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+		0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+		0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+		0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+		0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+		0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+		0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+		0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+		0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+		0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+		0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+		0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+		0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+		0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+		0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+		0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+		0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+		0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+	};
+
+	// Livox SDK seed
+	uint16_t crc = LIVOX_CRC16_SEED;
+
+	// Simple implementation for now - can be optimized later
+	for (size_t i = 0; i < length; ++i) {
+		crc = (crc >> 8) ^ crc_table_mcrf4xx[(crc & 0xff) ^ data[i]];
+	}
+
+	return crc;
+}
+
+// Standalone CRC32 calculation utility
+uint32_t calculateCrc32(const uint8_t* data, size_t length)
+{
+	/**		EXPLANATION:
+	 * Livox SDK CRC32 implementation (exact copy from FastCRC library)
+	 * This matches the exact implementation used by Livox devices
+	 */
+	static const uint32_t crc_table_crc32[256] = {
+		0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+		0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+		0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+		0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+		0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+		0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+		0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+		0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+		0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+		0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+		0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+		0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+		0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+		0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+		0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+		0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+		0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+		0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+		0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+		0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+		0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+		0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+		0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+		0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+		0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+		0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+		0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+		0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+		0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+		0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+		0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+		0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+		0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+		0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+		0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+		0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+		0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+		0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+		0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+		0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+		0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+		0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+		0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+		0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+		0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+		0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+		0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+		0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+		0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+		0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+		0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+		0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+		0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+		0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+		0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+		0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+		0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+		0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+		0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+		0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+		0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+		0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+		0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+		0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+	};
+
+	// Livox SDK seed XORed with 0xffffffff
+	uint32_t crc = LIVOX_CRC32_SEED ^ 0xffffffff;
+
+	for (size_t i = 0; i < length; ++i) {
+		crc = (crc >> 8) ^ crc_table_crc32[(crc & 0xff) ^ data[i]];
+	}
+
+	return crc ^ 0xffffffff;
+}
+
+// IP address parsing utility
+std::optional<IPOctets> parseIPv4Address(const std::string& ipAddress)
+{
+	IPOctets result;
+
+	std::istringstream iss(ipAddress);
+	if (std::getline(iss, result.octet1, '.') &&
+		std::getline(iss, result.octet2, '.') &&
+		std::getline(iss, result.octet3, '.') &&
+		std::getline(iss, result.octet4, '.'))
+	{
+		return result;
+	}
+
+	return std::nullopt;
+}
+
+// HeartbeatMessage methods
+HeartbeatMessage::HeartbeatMessage()
+{
+	// Initialize header
+	header.sof = 0xAA;
+	header.version = 0x01;
+	header.length = sizeof(Header) + sizeof(Command) + sizeof(Footer);
+	header.cmd_type = 0x00; // kCommandTypeCmd
+	header.seq_num = 0x0001; // Simple sequence number
+	header.crc_16 = 0; // Will be calculated
+
+	// Initialize command
+	command.cmd_set = 0x00; // kCommandSetGeneral
+	command.cmd_id = 0x03;  // kCommandIDGeneralHeartbeat
+
+	// Initialize footer
+	footer.crc_32 = 0; // Will be calculated
+	// Note: CRC16 will be calculated before sending (in swapToProtocolEndianness)
+}
+
+uint32_t HeartbeatMessage::calculateCrc32() const
+{
+	// Calculate CRC32 for the entire message excluding the footer.crc_32 field
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(HeartbeatMessage) - sizeof(footer.crc_32);
+
+	return comms::calculateCrc32(messageData, messageSize);
+}
+
+void HeartbeatMessage::swapContentsToProtocolEndianness()
+{
+	// Protocol is little-endian, so if host is already little-endian, no swap needed
+	if (endian::isLittleEndian()) {
+		return;
+	}
+
+	// Host is big-endian, need to swap to little-endian
+	// Only swap content fields, not CRC fields
+	header.swapToProtocolEndianness();
+	command.swapToProtocolEndianness();
+	// Note: footer.swapToProtocolEndianness() swaps CRC, so we skip it here
+}
+
+// DisconnectMessage methods
+DisconnectMessage::DisconnectMessage()
+{
+	// Initialize header
+	header.sof = 0xAA;
+	header.version = 0x01;
+	header.length = sizeof(Header) + sizeof(Command) + sizeof(Footer);
+	header.cmd_type = 0x00; // kCommandTypeCmd
+	header.seq_num = 0x0001; // Simple sequence number
+	header.crc_16 = 0; // Will be calculated
+
+	// Initialize command
+	command.cmd_set = 0x00; // kCommandSetGeneral
+	command.cmd_id = 0x06;  // kCommandIDGeneralDisconnect
+
+	// Initialize footer
+	footer.crc_32 = 0; // Will be calculated
+	// Note: CRC16 will be calculated before sending (in swapToProtocolEndianness)
+}
+
+uint32_t DisconnectMessage::calculateCrc32() const
+{
+	// Calculate CRC32 for the entire message excluding the footer.crc_32 field
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(DisconnectMessage) - sizeof(footer.crc_32);
+
+	return comms::calculateCrc32(messageData, messageSize);
+}
+
+void DisconnectMessage::swapContentsToProtocolEndianness()
+{
+	// Protocol is little-endian, so if host is already little-endian, no swap needed
+	if (endian::isLittleEndian()) {
+		return;
+	}
+
+	// Host is big-endian, need to swap to little-endian
+	// Only swap content fields, not CRC fields
+	header.swapToProtocolEndianness();
+	command.swapToProtocolEndianness();
+	// Note: footer.swapToProtocolEndianness() swaps CRC, so we skip it here
+}
+
+// StartStopSamplingMessage methods
+StartStopSamplingMessage::StartStopSamplingMessage()
+{
+	// Initialize header
+	header.sof = 0xAA;
+	header.version = 1;
+	header.length = sizeof(StartStopSamplingMessage);
+	header.cmd_type = 0x02; // MSG type
+	header.seq_num = 0; // Will be set by caller if needed
+	header.crc_16 = 0; // Will be calculated
+
+	// Initialize command
+	command.cmd_set = 0x00; // General command set
+	command.cmd_id = 0x04;  // Sampling command ID
+
+	// Initialize data - enable flag will be set manually by caller
+	enable = 0x00; // Default to stop, caller will override
+
+	// Initialize footer
+	footer.crc_32 = 0; // Will be calculated
+}
+
+uint32_t StartStopSamplingMessage::calculateCrc32() const
+{
+	// Calculate CRC32 for the entire message excluding the footer CRC32 field
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(StartStopSamplingMessage) - sizeof(footer.crc_32);
+
+	return comms::calculateCrc32(messageData, messageSize);
+}
+
+void StartStopSamplingMessage::swapContentsToProtocolEndianness()
+{
+	header.swapToProtocolEndianness();
+	command.swapToProtocolEndianness();
+}
+
+// SamplingResponse methods
+void SamplingResponse::swapContentsToHostEndianness()
+{
+	header.swapToHostEndianness();
+	command.swapToHostEndianness();
+	footer.swapToHostEndianness();
+}
+
+bool SamplingResponse::sanityCheck() const
+{
+	return header.sanityCheck() && command.sanityCheck() && footer.sanityCheck();
+}
+
+bool SamplingResponse::validateCrc32() const
+{
+	// Calculate CRC32 for the entire message excluding the footer CRC32 field
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(SamplingResponse) - sizeof(footer.crc_32);
+
+	uint32_t calculatedCrc = comms::calculateCrc32(messageData, messageSize);
+	bool isValid = (calculatedCrc == footer.crc_32);
+
+	// Debug output only if validation fails
+	if (!isValid)
+	{
+		std::cout << "SamplingResponse CRC32 Debug: calculated=0x"
+			<< std::hex << calculatedCrc
+			<< ", received=0x" << footer.crc_32 << std::dec << std::endl;
+	}
+
+	return isValid;
+}
+
+// HeartbeatACK methods
+void HeartbeatACK::swapContentsToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	// Only swap content fields, not CRC fields
+	header.swapToHostEndianness();
+	command.swapToHostEndianness();
+	ack_msg = __builtin_bswap32(ack_msg);
+	// Note: footer.swapToHostEndianness() swaps CRC, so we skip it here
+}
+
+bool HeartbeatACK::sanityCheck() const
+{
+	return header.sanityCheck() &&
+		command.sanityCheck() &&
+		(command.cmd_set == 0x00) && (command.cmd_id == 0x03) &&
+		footer.sanityCheck();
+}
+
+bool HeartbeatACK::validateCrc32() const
+{
+	// Calculate CRC32 for the entire message excluding the footer.crc_32 field
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(HeartbeatACK) - sizeof(footer.crc_32);
+	uint32_t calculatedCrc = comms::calculateCrc32(messageData, messageSize);
+
+	// Compare with the CRC in the footer
+	bool isValid = (calculatedCrc == footer.crc_32);
+
+	// Debug output only if validation fails
+	if (!isValid) {
+		std::cout << "HeartbeatACK CRC32 Debug: calculated=0x" << std::hex << calculatedCrc
+			<< ", received=0x" << footer.crc_32 << std::dec << std::endl;
+	}
+
+	return isValid;
+}
+
+// SetLiDARReturnMode methods
+SetLiDARReturnMode::SetLiDARReturnMode()
+{
+	// Initialize header
+	header.sof = 0xAA;
+	header.version = 0x01;
+	header.length = sizeof(SetLiDARReturnMode);
+	header.crc_16 = 0; // Will be calculated later
+
+	// Initialize command
+	command.cmd_set = 0x01; // LiDAR Command
+	command.cmd_id = 0x06;  // Set LiDAR Return Mode
+
+	// Initialize mode (default to Single Return First)
+	mode = 0x00;
+
+	// Initialize footer
+	footer.crc_32 = 0; // Will be calculated later
+}
+
+uint32_t SetLiDARReturnMode::calculateCrc32() const
+{
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(SetLiDARReturnMode) - sizeof(footer.crc_32);
+	return comms::calculateCrc32(messageData, messageSize);
+}
+
+void SetLiDARReturnMode::swapContentsToProtocolEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	header.swapToProtocolEndianness();
+	command.swapToProtocolEndianness();
+	// mode is uint8_t, no endianness conversion needed
+	footer.swapToProtocolEndianness();
+}
+
+// SetLiDARReturnModeResponse methods
+void SetLiDARReturnModeResponse::swapContentsToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	header.swapToHostEndianness();
+	command.swapToHostEndianness();
+	// ret_code is uint8_t, no endianness conversion needed
+	// Note: footer.swapToHostEndianness() swaps CRC, so we skip it here
+}
+
+bool SetLiDARReturnModeResponse::sanityCheck() const
+{
+	return header.sanityCheck() &&
+		command.sanityCheck() &&
+		(command.cmd_set == 0x01) && (command.cmd_id == 0x06) &&
+		(ret_code <= 0x01) && // Valid return codes: 0x00-0x01
+		footer.sanityCheck();
+}
+
+bool SetLiDARReturnModeResponse::validateCrc32() const
+{
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(SetLiDARReturnModeResponse) - sizeof(footer.crc_32);
+	uint32_t calculatedCrc = comms::calculateCrc32(messageData, messageSize);
+	return (calculatedCrc == footer.crc_32);
+}
+
+// GetLiDARReturnMode methods
+GetLiDARReturnMode::GetLiDARReturnMode()
+{
+	// Initialize header
+	header.sof = 0xAA;
+	header.version = 0x01;
+	header.length = sizeof(GetLiDARReturnMode);
+	header.crc_16 = 0; // Will be calculated later
+
+	// Initialize command
+	command.cmd_set = 0x01; // LiDAR Command
+	command.cmd_id = 0x07;  // Get LiDAR Return Mode
+
+	// Initialize footer
+	footer.crc_32 = 0; // Will be calculated later
+}
+
+uint32_t GetLiDARReturnMode::calculateCrc32() const
+{
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(GetLiDARReturnMode) - sizeof(footer.crc_32);
+	return comms::calculateCrc32(messageData, messageSize);
+}
+
+void GetLiDARReturnMode::swapContentsToProtocolEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	header.swapToProtocolEndianness();
+	command.swapToProtocolEndianness();
+	footer.swapToProtocolEndianness();
+}
+
+// GetLiDARReturnModeResponse methods
+void GetLiDARReturnModeResponse::swapContentsToHostEndianness()
+{
+	if (endian::isLittleEndian()) { return; }
+	header.swapToHostEndianness();
+	command.swapToHostEndianness();
+	// ret_code and mode are uint8_t, no endianness conversion needed
+	// Note: footer.swapToHostEndianness() swaps CRC, so we skip it here
+}
+
+bool GetLiDARReturnModeResponse::sanityCheck() const
+{
+	return header.sanityCheck() &&
+		command.sanityCheck() &&
+		(command.cmd_set == 0x01) && (command.cmd_id == 0x07) &&
+		(ret_code <= 0x01) && // Valid return codes: 0x00-0x01
+		(mode <= 0x03) && // Valid modes: 0x00-0x03
+		footer.sanityCheck();
+}
+
+bool GetLiDARReturnModeResponse::validateCrc32() const
+{
+	const uint8_t* messageData = reinterpret_cast<const uint8_t*>(this);
+	size_t messageSize = sizeof(GetLiDARReturnModeResponse) - sizeof(footer.crc_32);
+	uint32_t calculatedCrc = comms::calculateCrc32(messageData, messageSize);
+	return (calculatedCrc == footer.crc_32);
+}
+
+} // namespace comms
+} // namespace livoxProto1
@@ -0,0 +1,361 @@
+#ifndef LIVOXPROTO1_PROTOCOL_H
+#define LIVOXPROTO1_PROTOCOL_H
+
+#include <boostAsioLinkageFix.h>
+#include <vector>
+#include <string>
+#include <memory>
+#include <sstream>
+#include <atomic>
+#include <cstdint>
+#include <boost/asio/ip/address_v4.hpp>
+#include <user/senseApiDesc.h>
+
+namespace livoxProto1 {
+namespace comms {
+
+/**		EXPLANATION:
+ * Undocumented Livox SDK CRC seed constants. These were found in the Livox SDK
+ * source code.
+ */
+constexpr uint16_t LIVOX_CRC16_SEED = 0x4c49;
+constexpr uint32_t LIVOX_CRC32_SEED = 0x564f580a;
+
+// Endianness detection
+namespace endian {
+	inline bool isLittleEndian() {
+		union {
+			uint32_t i;
+			char c[4];
+		} test = {0x01020304};
+		return test.c[0] == 4;
+	}
+}
+
+// IPv4 address validation
+inline bool isValidIPv4(const std::string& ipAddress) {
+	boost::system::error_code ec;
+	boost::asio::ip::address_v4::from_string(ipAddress, ec);
+	return !ec;
+}
+
+// CRC calculation utilities
+uint16_t calculateCrc16(
+	const uint8_t* data, size_t length);
+uint32_t calculateCrc32(
+	const uint8_t* data, size_t length);
+
+// IP address parsing utility
+struct IPOctets {
+	std::string octet1, octet2, octet3, octet4;
+};
+
+std::optional<IPOctets> parseIPv4Address(const std::string& ipAddress);
+
+// Device identifier comparison
+inline bool deviceIdentifiersEqual(
+	const std::string& id1, const std::string& id2
+	)
+{
+	// Use pointers to avoid unnecessary string copies
+	const std::string* serial1_ptr;
+	const std::string* serial2_ptr;
+
+	// Local copies only needed for 15-character broadcast codes
+	std::string serial1_copy, serial2_copy;
+
+	// Determine if id1 is serial (14 chars) or broadcast code (15 chars)
+	if (id1.length() == 14) {
+		serial1_ptr = &id1; // No copy needed, use original string
+	} else if (id1.length() == 15) {
+		serial1_copy = id1.substr(0, 14); // Copy only when necessary
+		serial1_ptr = &serial1_copy;
+	} else {
+		return false; // Invalid length
+	}
+
+	// Determine if id2 is serial (14 chars) or broadcast code (15 chars)
+	if (id2.length() == 14) {
+		serial2_ptr = &id2; // No copy needed, use original string
+	} else if (id2.length() == 15) {
+		serial2_copy = id2.substr(0, 14); // Copy only when necessary
+		serial2_ptr = &serial2_copy;
+	} else {
+		return false; // Invalid length
+	}
+
+	// Compare the serial numbers using pointers
+	return *serial1_ptr == *serial2_ptr;
+}
+
+/**		EXPLANATION:
+ * Device types as defined in the Livox protocol specification
+ */
+enum class DeviceType : uint8_t {
+	Hub = 0,
+	Mid40 = 1,
+	Tele15 = 2,
+	Horizon = 3,
+	Mid70 = 6,
+	Avia = 7
+};
+
+/**		EXPLANATION:
+ * Protocol frame header structure.
+ * All multi-byte fields are in little-endian format as per protocol spec.
+ */
+struct Header
+{
+	uint8_t sof;           // 0: Start of Frame (0xAA)
+	uint8_t version;       // 1: Protocol Version (1)
+	uint16_t length;       // 2-3: Frame Length (little-endian)
+	uint8_t cmd_type;      // 4: Command Type (0x02 = MSG for broadcast)
+	uint16_t seq_num;      // 5-6: Sequence Number (little-endian)
+	uint16_t crc_16;       // 7-8: Header Checksum (little-endian)
+
+	void swapToHostEndianness();
+	void swapToProtocolEndianness();
+	void swapCrc16ToHostEndianness();
+	void swapCrc16ToProtocolEndianness();
+	bool sanityCheck() const;
+	uint16_t calculateCrc16() const;
+	bool validateCrc16() const;
+	void setCrc16FromRawBytes();
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Protocol frame footer structure.
+ * All multi-byte fields are in little-endian format as per protocol spec.
+ */
+struct Footer
+{
+	uint32_t crc_32;       // 0-3: Whole Frame Checksum (little-endian)
+
+	void swapToHostEndianness();
+	void swapToProtocolEndianness();
+	void swapCrc32ToHostEndianness();
+	void swapCrc32ToProtocolEndianness();
+	bool validateCrc32() const;
+	bool sanityCheck() const;
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Command identification structure used in all Livox protocol messages.
+ * Contains the command set and command ID fields.
+ */
+struct Command
+{
+	uint8_t cmd_set;		// 0: Command Set (0x00 = General, etc.)
+	uint8_t cmd_id;		// 1: Command ID (0x00 = Broadcast, 0x01 = Handshake, etc.)
+
+	void swapToHostEndianness();
+	void swapToProtocolEndianness();
+	bool sanityCheck() const;
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete wire format for Livox broadcast messages.
+ * All multi-byte fields are in little-endian format as per protocol spec.
+ */
+struct BroadcastMessage
+{
+	Header header;         // 0-8: Protocol frame header
+	Command command;       // 9-10: Command identification
+	uint8_t broadcast_code[16]; // 11-26: Device Broadcast Code (null-terminated string)
+	uint8_t dev_type;      // 27: Device Type
+	uint16_t reserved;     // 28-29: Reserved (little-endian)
+	Footer footer;         // 30-33: Protocol frame footer
+
+	void swapContentsToHostEndianness();
+	bool sanityCheck() const;
+	bool validateCrc32() const;
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete handshake request frame for connecting to Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct HandshakeRequest
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	uint8_t user_ip[4];		// 11-14: Host IP Address (little-endian)
+	uint16_t data_port;		// 15-16: Host Point Cloud Data UDP Destination Port (little-endian)
+	uint16_t cmd_port;		// 17-18: Host Control Command UDP Destination Port (little-endian)
+	uint16_t imu_port;		// 19-20: Host IMU UDP Destination Port (little-endian)
+	Footer footer;			// 21-24: Protocol frame footer
+
+	HandshakeRequest(
+		const std::string& hostIP,
+		uint16_t dataPort, uint16_t cmdPort, uint16_t imuPort);
+
+	// Calculate CRC32 for the entire message
+	uint32_t calculateCrc32() const;
+	void swapContentsToProtocolEndianness();
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete handshake response frame from Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct HandshakeResponse
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	uint8_t ret_code;		// 11: Return Code (0x00 = Success, 0x01 = Fail)
+	Footer footer;			// 12-15: Protocol frame footer
+
+	void swapContentsToHostEndianness();
+	bool sanityCheck() const;
+	bool validateCrc32() const;
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete heartbeat command frame for maintaining connection with Livox devices.
+ * This is the complete wire format including header, command fields, and footer.
+ */
+struct HeartbeatMessage
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	Footer footer;			// 11-14: Protocol frame footer
+
+	HeartbeatMessage();
+	uint32_t calculateCrc32() const;
+	void swapContentsToProtocolEndianness();
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete disconnect command frame for disconnecting from Livox devices.
+ * This is the complete wire format including header, command fields, and footer.
+ */
+struct DisconnectMessage
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	Footer footer;			// 11-14: Protocol frame footer
+
+	DisconnectMessage();
+	uint32_t calculateCrc32() const;
+	void swapContentsToProtocolEndianness();
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete start/stop sampling command frame for enabling/disabling point cloud data from Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct StartStopSamplingMessage
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	uint8_t enable;		// 11: Enable flag (0x01 = Start, 0x00 = Stop)
+	Footer footer;			// 12-15: Protocol frame footer
+
+	StartStopSamplingMessage();
+	uint32_t calculateCrc32() const;
+	void swapContentsToProtocolEndianness();
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete sampling response frame from Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct SamplingResponse
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	uint8_t ret_code;		// 11: Return Code (0x00 = Success, 0x01 = Fail)
+	Footer footer;			// 12-15: Protocol frame footer
+
+	void swapContentsToHostEndianness();
+	bool sanityCheck() const;
+	bool validateCrc32() const;
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete heartbeat ACK response frame from Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct HeartbeatACK
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	uint8_t ret_code;		// 11: Return Code (0x00 = Success, 0x01 = Fail)
+	uint8_t work_state;		// 12: LiDAR/Hub State (0x00: Initializing, 0x01: Normal, 0x02: Power-Saving, 0x03: Standby, 0x04: Error)
+	uint8_t feature_msg;	// 13: LiDAR Feature Message (Bit0: Rain/Fog Suppression Switch)
+	uint32_t ack_msg;		// 14-17: ACK Message (Initialization Progress or Status Code)
+	Footer footer;			// 18-21: Protocol frame footer
+
+	void swapContentsToHostEndianness();
+	bool sanityCheck() const;
+	bool validateCrc32() const;
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete set LiDAR return mode command frame for Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct SetLiDARReturnMode
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	uint8_t mode;			// 11: Return Mode (0x00: Single Return First, 0x01: Single Return Strongest, 0x02: Dual Return, 0x03: Triple Return)
+	Footer footer;			// 12-15: Protocol frame footer
+
+	SetLiDARReturnMode();
+	uint32_t calculateCrc32() const;
+	void swapContentsToProtocolEndianness();
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete set LiDAR return mode response frame from Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct SetLiDARReturnModeResponse
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	uint8_t ret_code;		// 11: Return Code (0x00 = Success, 0x01 = Fail)
+	Footer footer;			// 12-15: Protocol frame footer
+
+	void swapContentsToHostEndianness();
+	bool sanityCheck() const;
+	bool validateCrc32() const;
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete get LiDAR return mode command frame for Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct GetLiDARReturnMode
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	Footer footer;			// 11-14: Protocol frame footer
+
+	GetLiDARReturnMode();
+	uint32_t calculateCrc32() const;
+	void swapContentsToProtocolEndianness();
+} __attribute__((packed));
+
+/**		EXPLANATION:
+ * Complete get LiDAR return mode response frame from Livox devices.
+ * This is the complete wire format including header, command fields, data, and footer.
+ */
+struct GetLiDARReturnModeResponse
+{
+	Header header;			// 0-8: Protocol frame header
+	Command command;		// 9-10: Command identification
+	uint8_t ret_code;		// 11: Return Code (0x00 = Success, 0x01 = Fail)
+	uint8_t mode;			// 12: Return Mode (0x00: Single Return First, 0x01: Single Return Strongest, 0x02: Dual Return, 0x03: Triple Return)
+	Footer footer;			// 13-16: Protocol frame footer
+
+	void swapContentsToHostEndianness();
+	bool sanityCheck() const;
+	bool validateCrc32() const;
+} __attribute__((packed));
+
+} // namespace comms
+} // namespace livoxProto1
+
+#endif // LIVOXPROTO1_PROTOCOL_H
@@ -0,0 +1,347 @@
+#include <iostream>
+#include <cstring>
+#include <functional>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "udpCommandDemuxer.h"
+#include "core.h"
+#include "device.h"
+
+namespace livoxProto1 {
+namespace comms {
+
+UdpCommandDemuxer::UdpCommandDemuxer(
+	const std::shared_ptr<smo::ComponentThread> &componentThread,
+	DeviceManager &deviceManager,
+	uint16_t commandPort,
+	uint16_t dataPort
+)
+: componentThread(componentThread), deviceManager(deviceManager),
+commandPort(commandPort), dataPort(dataPort),
+senderAddrLen(sizeof(senderAddr))
+{
+}
+
+UdpCommandDemuxer::~UdpCommandDemuxer()
+{
+	stop();
+}
+
+void UdpCommandDemuxer::start()
+{
+	if (isActive.load())
+	{
+		std::cerr << __func__ << ": Demuxer is already running"
+			<< std::endl;
+		return;
+	}
+
+	try
+	{
+		{
+			smo::SpinLock::Guard lock(isActiveAndShouldStopLock);
+
+			setupSockets();
+			isActive.store(true);
+			shouldStop.store(false);
+		}
+
+		// Start the async receive loop
+		startAsyncReceive();
+
+		std::cout
+			<< __func__ << ": UDP Command Demuxer started on port "
+			<< commandPort << std::endl;
+	}
+	catch (const std::exception &e)
+	{
+		std::cerr
+			<< __func__ << ": Failed to start demuxer: "
+			<< e.what() << std::endl;
+		isActive.store(false);
+		throw;
+	}
+}
+
+void UdpCommandDemuxer::stop()
+{
+	{
+		smo::SpinLock::Guard lock(isActiveAndShouldStopLock);
+		if (!isActive.load())
+			{ return; }
+
+		shouldStop.store(true);
+	}
+
+	// Close socket and cleanup
+	if (cmdEndpointFdDesc)
+	{
+		cmdEndpointFdDesc->cancel();
+		cmdEndpointFdDesc.reset();
+	}
+
+	if (pcloudDataFdDesc)
+	{
+		pcloudDataFdDesc->cancel();
+		pcloudDataFdDesc.reset();
+	}
+
+	isActive.store(false);
+	std::cout
+		<< __func__ << ": UDP Command Demuxer stopped"
+		<< std::endl;
+}
+
+void UdpCommandDemuxer::setupSockets()
+{
+	setupCommandSocket();
+	setupPcloudDataSocket();
+}
+
+void UdpCommandDemuxer::setupCommandSocket()
+{
+	// RAII class to manage socket file descriptor
+	struct SocketRAII
+	{
+		int fd;
+		SocketRAII(int socketFd) : fd(socketFd) {}
+		~SocketRAII() { if (fd >= 0) close(fd); }
+		void commit() { fd = -1; } // Transfer ownership, prevent close
+		int getFd() const { return fd; }
+		bool isValid() const { return fd >= 0; }
+	};
+
+	// Create UDP socket
+	SocketRAII socketGuard(socket(AF_INET, SOCK_DGRAM, 0));
+	if (!socketGuard.isValid())
+	{
+		throw std::runtime_error(
+			std::string(__func__)
+			+ ": Failed to create socket: " + strerror(errno));
+	}
+
+	// Set socket to non-blocking mode
+	int flags = fcntl(socketGuard.getFd(), F_GETFL, 0);
+	if (flags < 0 || fcntl(
+		socketGuard.getFd(), F_SETFL, flags | O_NONBLOCK) < 0)
+	{
+		throw std::runtime_error(
+			std::string(__func__)
+			+ ": Failed to set non-blocking mode: " + strerror(errno));
+	}
+
+	// Bind to command port
+	struct sockaddr_in localAddr;
+	memset(&localAddr, 0, sizeof(localAddr));
+	localAddr.sin_family = AF_INET;
+	localAddr.sin_addr.s_addr = INADDR_ANY;
+	localAddr.sin_port = htons(commandPort);
+
+	if (bind(
+		socketGuard.getFd(), (struct sockaddr *)&localAddr,
+		sizeof(localAddr)) < 0)
+	{
+		throw std::runtime_error(
+			std::string(__func__) + ": Failed to bind to port "
+			+ std::to_string(commandPort) + ": " + strerror(errno));
+	}
+
+	// Create boost wrapper for async operations
+	cmdEndpointFdDesc = std::make_shared<boost::asio::posix::stream_descriptor>(
+		componentThread->getIoService(), socketGuard.getFd());
+
+	// Transfer ownership, prevent auto-close
+	socketGuard.commit();
+}
+
+void UdpCommandDemuxer::setupPcloudDataSocket()
+{
+	// RAII class to manage socket file descriptor
+	struct SocketRAII
+	{
+		int fd;
+		SocketRAII(int socketFd) : fd(socketFd) {}
+		~SocketRAII() { if (fd >= 0) close(fd); }
+		void commit() { fd = -1; } // Transfer ownership, prevent close
+		int getFd() const { return fd; }
+		bool isValid() const { return fd >= 0; }
+	};
+
+	// Create UDP socket for point cloud data reception
+	SocketRAII socketGuard(socket(AF_INET, SOCK_DGRAM, 0));
+	if (!socketGuard.isValid())
+	{
+		throw std::runtime_error(
+			std::string(__func__)
+			+ ": Failed to create socket: " + strerror(errno));
+	}
+
+	// Set socket to non-blocking mode
+	int flags = fcntl(socketGuard.getFd(), F_GETFL, 0);
+	if (flags < 0 ||
+		fcntl(socketGuard.getFd(), F_SETFL, flags | O_NONBLOCK) < 0)
+	{
+		throw std::runtime_error(
+			std::string(__func__)
+			+ ": Failed to set non-blocking mode: " + strerror(errno));
+	}
+
+	// Bind to the data port
+	struct sockaddr_in localAddr;
+	memset(&localAddr, 0, sizeof(localAddr));
+	localAddr.sin_family = AF_INET;
+	localAddr.sin_addr.s_addr = INADDR_ANY;
+	localAddr.sin_port = htons(dataPort);
+
+	if (bind(
+		socketGuard.getFd(), (struct sockaddr *)&localAddr,
+		sizeof(localAddr)) < 0)
+	{
+		throw std::runtime_error(
+			std::string(__func__) + ": Failed to bind to data port: "
+			+ std::to_string(dataPort) + ": " + strerror(errno));
+	}
+
+	// Create boost wrapper for async operations
+	pcloudDataFdDesc = std::make_shared<boost::asio::posix::stream_descriptor>(
+		componentThread->getIoService(), socketGuard.getFd());
+
+	// Transfer ownership, prevent auto-close
+	socketGuard.commit();
+}
+
+void UdpCommandDemuxer::startAsyncReceive()
+{
+	if (!isActive.load() || shouldStop.load())
+		{ return; }
+
+	cmdEndpointFdDesc->async_wait(
+		boost::asio::posix::stream_descriptor::wait_read,
+		std::bind(
+			&UdpCommandDemuxer::onDataReady, this, std::placeholders::_1));
+}
+
+void UdpCommandDemuxer::onDataReady(const boost::system::error_code &error)
+{
+	if (error)
+	{
+		if (error != boost::asio::error::operation_aborted)
+		{
+			std::cerr
+				<< __func__ << ": Socket error: "
+				<< error.message() << std::endl;
+		}
+		return;
+	}
+
+	smo::SpinLock::Guard lock(isActiveAndShouldStopLock);
+
+	if (!isActive.load() || shouldStop.load())
+		{ return; }
+
+	// Read the data
+	bytesReceived = recvfrom(
+		cmdEndpointFdDesc->native_handle(), receiveBuffer,
+		sizeof(receiveBuffer), 0,
+		(struct sockaddr *)&senderAddr, &senderAddrLen);
+
+	if (bytesReceived > 0) {
+		processIncomingData();
+	}
+	else if (bytesReceived < 0)
+	{
+		if (errno != EAGAIN && errno != EWOULDBLOCK)
+		{
+			std::cerr << __func__ << ": recvfrom error: "
+				<< strerror(errno) << std::endl;
+		}
+	}
+
+	// Continue listening for more data
+	startAsyncReceive();
+}
+
+void UdpCommandDemuxer::processIncomingData()
+{
+	if (bytesReceived < 2)
+	{
+		// Too small to contain any meaningful data
+		return;
+	}
+
+	// Extract source IP address
+	char sourceIP[INET_ADDRSTRLEN];
+	inet_ntop(AF_INET, &senderAddr.sin_addr, sourceIP, INET_ADDRSTRLEN);
+
+	// First, find device with matching IP address in DeviceManager collection
+	for (const auto &device : deviceManager.devices)
+	{
+		if (device->discoveredDevice.ipAddr != sourceIP) { continue; }
+
+		// Found matching device, route the datagram to it
+		try
+		{
+			device->handleUdpDgram(
+				receiveBuffer, bytesReceived, senderAddr);
+		}
+		catch (const std::exception &e)
+		{
+			std::cerr
+				<< __func__ << ": Device handler exception for IP "
+				<< sourceIP << ": " << e.what() << std::endl;
+		}
+		return;
+	}
+
+	// If not found in DeviceManager, check temporary collection (devices under construction)
+	auto tempIt = livoxProto1::Device::devicesUnderConstruction.find(sourceIP);
+	if (tempIt != livoxProto1::Device::devicesUnderConstruction.end())
+	{
+		// Extract command set and command ID from the datagram
+		if (bytesReceived >= static_cast<ssize_t>(
+			sizeof(livoxProto1::comms::Header)
+			+ sizeof(livoxProto1::comms::Command)))
+		{
+			uint8_t cmd_set = receiveBuffer[
+				sizeof(livoxProto1::comms::Header)];
+			uint8_t cmd_id = receiveBuffer[
+				sizeof(livoxProto1::comms::Header) + 1];
+
+			// Found matching dev in temp collection, invoke matching handlers
+			for (const auto& cmdHandler : tempIt->second)
+			{
+				if (cmdHandler.cmd_set != cmd_set
+					|| cmdHandler.cmd_id != cmd_id)
+				{
+					continue;
+				}
+
+				try
+				{
+					cmdHandler.handler(
+						receiveBuffer, bytesReceived, senderAddr);
+				}
+				catch (const std::exception &e)
+				{
+					std::cerr << __func__ << ": Temporary device handler "
+						"exception for IP " << sourceIP << ": " << e.what()
+						<< std::endl;
+				}
+			}
+		}
+		return;
+	}
+
+	// No device found with matching IP in either collection, discard the data
+	std::cerr
+		<< __func__ << ": No device found for source IP "
+		<< sourceIP << ", discarding datagram" << std::endl;
+}
+
+} // namespace comms
+} // namespace livoxProto1
@@ -0,0 +1,99 @@
+#ifndef UDP_COMMAND_DEMUXER_H
+#define UDP_COMMAND_DEMUXER_H
+
+#include <boostAsioLinkageFix.h>
+#include <atomic>
+#include <memory>
+#include <boost/asio/posix/stream_descriptor.hpp>
+#include <componentThread.h>
+#include <spinLock.h>
+
+namespace livoxProto1 {
+
+// Forward declarations
+class DeviceManager;
+
+namespace comms {
+
+/**
+ * UdpCommandDemuxer - Routes UDP command datagrams to appropriate devices
+ *
+ * This class listens on the command port (65000) for incoming UDP datagrams
+ * from Livox devices and routes them to the appropriate Device based on
+ * the source IP address.
+ *
+ * The reason we need a whole class for this is because we use the same port
+ * numbers for all connected devices, so we have no way to distinguish between
+ * devices except based on the devices' IP addrs. Since all commands are sent
+ * over UDP, our sockets don't have built-in binding to a specific source IP.
+ *
+ * So we need to discriminate between source IPs manually, and demultiplex
+ * the dgrams received from different devices manually.
+ *
+ * We'll prolly also have to do the same thing for point cloud and IMU data, so
+ * we'll prolly end up renaming this class to UdpResponseDemuxer.
+ */
+class UdpCommandDemuxer
+{
+public:
+	UdpCommandDemuxer(
+		const std::shared_ptr<smo::ComponentThread>& componentThread,
+		DeviceManager& deviceManager,
+		uint16_t commandPort = 56001,
+		uint16_t dataPort = 56000);
+
+	~UdpCommandDemuxer();
+
+	void start();
+	void stop();
+	bool isRunning() const { return isActive.load(); }
+
+	// Get shared pointer to command endpoint for handshake use
+	std::shared_ptr<boost::asio::posix::stream_descriptor>
+	getCmdEndpointFdDesc() const
+	{
+		return cmdEndpointFdDesc;
+	}
+
+	// Get shared pointer to pcloud data fd for use in IoUringAssemblyEngine
+	std::shared_ptr<boost::asio::posix::stream_descriptor>
+	getPcloudDataFdDesc() const
+	{
+		return pcloudDataFdDesc;
+	}
+
+private:
+	// Socket and async objects
+	std::shared_ptr<boost::asio::posix::stream_descriptor> pcloudDataFdDesc;
+	// Socket and async objects
+	std::shared_ptr<boost::asio::posix::stream_descriptor> cmdEndpointFdDesc;
+
+private:
+	void setupSockets();
+	void setupCommandSocket();
+	void setupPcloudDataSocket();
+	void startAsyncReceive();
+	void onDataReady(const boost::system::error_code& error);
+	void processIncomingData();
+
+	std::shared_ptr<smo::ComponentThread> componentThread;
+	DeviceManager& deviceManager;
+	uint16_t commandPort;
+	uint16_t dataPort;
+
+	// State management
+	smo::SpinLock isActiveAndShouldStopLock;
+	std::atomic<bool> isActive{false};
+	std::atomic<bool> shouldStop{false};
+
+	// Receive buffer
+	uint8_t receiveBuffer[1024];
+	struct sockaddr_in senderAddr;
+	socklen_t senderAddrLen;
+	ssize_t bytesReceived;
+};
+
+} // namespace comms
+} // namespace livoxProto1
+
+#endif // UDP_COMMAND_DEMUXER_H
@@ -13,7 +13,7 @@ if(ENABLE_LIB_xcbXorg)
    # Set config define for header generation
    add_compile_definitions(CONFIG_LIB_XCBXORG_ENABLED)
    target_include_directories(xcbXorg PUBLIC ${XCB_INCLUDE_DIRS})
-    target_link_libraries(xcbXorg ${XCB_LIBRARIES})
+    target_link_libraries(xcbXorg ${XCB_LIBRARIES} attachmentSupport)

    # Install rules
    install(TARGETS xcbXorg DESTINATION lib)
@@ -0,0 +1,57 @@
+option(COMPILE_CL_CHECKS "Compile CL checks" OFF)
+
+if(COMPILE_CL_CHECKS)
+	# Find OpenCL: try find_package first, fall back to pkg-config
+	find_package(OpenCL QUIET)
+	if(OpenCL_FOUND)
+		# Normalize find_package variables to match pkg_check_modules naming
+		set(OPENCL_FOUND TRUE)
+		set(OPENCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS})
+		# Handle both OpenCL_LIBRARY (singular) and OpenCL_LIBRARIES (plural)
+		if(OpenCL_LIBRARIES)
+			set(OPENCL_LIBRARIES ${OpenCL_LIBRARIES})
+		else()
+			set(OPENCL_LIBRARIES ${OpenCL_LIBRARY})
+		endif()
+		set(OPENCL_LIBRARY_DIRS "")
+		message(STATUS "Found OpenCL using find_package")
+	else()
+		# Fall back to pkg-config
+		pkg_check_modules(OPENCL OpenCL)
+		if(NOT OPENCL_FOUND)
+			message(FATAL_ERROR
+				"Failed to find OpenCL: both find_package and "
+				"pkg_check_modules failed. Try installing the "
+				"'ocl-icd-opencl-dev' package (or the appropriate "
+				"OpenCL development package for your system)."
+			)
+		endif()
+		message(STATUS "Found OpenCL using pkg-config")
+	endif()
+
+	add_executable(clhostshmemptrcheck clhostshmemptrcheck.cpp)
+	target_include_directories(clhostshmemptrcheck
+		PUBLIC ${OPENCL_INCLUDE_DIRS})
+	target_link_libraries(clhostshmemptrcheck
+		${OPENCL_LIBRARIES})
+	add_executable(clshmemlatency clshmemlatency.cpp)
+	target_include_directories(clshmemlatency
+		PUBLIC ${OPENCL_INCLUDE_DIRS})
+	target_link_libraries(clshmemlatency
+		${OPENCL_LIBRARIES})
+	add_executable(clshmemlatency_callback clshmemlatency_callback.cpp)
+	target_include_directories(clshmemlatency_callback
+		PUBLIC ${OPENCL_INCLUDE_DIRS})
+	target_link_libraries(clshmemlatency_callback
+		${OPENCL_LIBRARIES})
+	add_executable(clshmemcheck clshmemcheck.cpp)
+	target_include_directories(clshmemcheck
+		PUBLIC ${OPENCL_INCLUDE_DIRS})
+	target_link_libraries(clshmemcheck
+		${OPENCL_LIBRARIES})
+	add_executable(clzerocopycheck clzerocopycheck.cpp)
+	target_include_directories(clzerocopycheck
+		PUBLIC ${OPENCL_INCLUDE_DIRS})
+	target_link_libraries(clzerocopycheck
+		${OPENCL_LIBRARIES})
+endif()
@@ -0,0 +1,125 @@
+#define CL_TARGET_OPENCL_VERSION 300
+#include <CL/cl.h>
+#include <iostream>
+#include <vector>
+#include <cstring>
+
+static const char* clErrorToStr(cl_int err)
+{
+    switch(err) {
+        case CL_SUCCESS:                            return "CL_SUCCESS";
+        case CL_INVALID_VALUE:                      return "CL_INVALID_VALUE";
+        case CL_INVALID_CONTEXT:                    return "CL_INVALID_CONTEXT";
+        case CL_INVALID_MEM_OBJECT:                 return "CL_INVALID_MEM_OBJECT";
+        case CL_OUT_OF_HOST_MEMORY:                 return "CL_OUT_OF_HOST_MEMORY";
+        case CL_INVALID_OPERATION:                  return "CL_INVALID_OPERATION";
+        case CL_MEM_OBJECT_ALLOCATION_FAILURE:      return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+        default:                                    return "UNKNOWN_ERROR";
+    }
+}
+
+// Try creating a USE_HOST_PTR buffer on this device
+bool testUseHostPtr(cl_context ctx, cl_device_id dev)
+{
+    const size_t bufSize = 1024;
+    std::vector<char> host(bufSize, 0);
+
+    cl_int err = 0;
+    cl_mem buf = clCreateBuffer(
+        ctx,
+        CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+        bufSize,
+        host.data(),
+        &err
+    );
+
+    if(err != CL_SUCCESS) {
+        std::cerr << "  clCreateBuffer(CL_MEM_USE_HOST_PTR) failed: "
+                  << clErrorToStr(err) << "\n";
+        return false;
+    }
+
+    // Try to enqueue a trivial write to verify it works
+    cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
+    cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
+    if(err != CL_SUCCESS){
+        std::cerr << "  Failed to create command queue: "
+                  << clErrorToStr(err) << "\n";
+        clReleaseMemObject(buf);
+        return false;
+    }
+
+    err = clEnqueueWriteBuffer(q, buf, CL_TRUE, 0, bufSize, host.data(), 0, nullptr, nullptr);
+    clFinish(q);
+
+    bool ok = (err == CL_SUCCESS);
+
+    if(!ok) {
+        std::cerr << "  clEnqueueWriteBuffer failed: " << clErrorToStr(err) << "\n";
+    }
+
+    clReleaseCommandQueue(q);
+    clReleaseMemObject(buf);
+
+    return ok;
+}
+
+int main()
+{
+    cl_uint numPlatforms = 0;
+    clGetPlatformIDs(0, nullptr, &numPlatforms);
+
+    if(numPlatforms == 0){
+        std::cout << "No OpenCL platforms.\n";
+        return 0;
+    }
+
+    std::vector<cl_platform_id> plats(numPlatforms);
+    clGetPlatformIDs(numPlatforms, plats.data(), nullptr);
+
+    for(cl_uint p = 0; p < numPlatforms; ++p)
+    {
+        char buf[256];
+
+        clGetPlatformInfo(plats[p], CL_PLATFORM_NAME, sizeof(buf), buf, nullptr);
+        std::cout << "Platform: " << buf << "\n";
+
+        cl_uint numDevs = 0;
+        clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevs);
+
+        if(numDevs == 0) {
+            std::cout << "  No devices found on this platform.\n";
+            continue;
+        }
+
+        std::vector<cl_device_id> devs(numDevs);
+        clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, numDevs, devs.data(), nullptr);
+
+        for(cl_uint d = 0; d < numDevs; ++d)
+        {
+            clGetDeviceInfo(devs[d], CL_DEVICE_NAME, sizeof(buf), buf, nullptr);
+            std::cout << "  Device: " << buf << "\n";
+
+            // Create a context for this device
+            cl_int err;
+            cl_context ctx = clCreateContext(nullptr, 1, &devs[d], nullptr, nullptr, &err);
+
+            if(err != CL_SUCCESS) {
+                std::cout << "    Failed to create context: "
+                          << clErrorToStr(err) << "\n";
+                continue;
+            }
+
+            bool supported = testUseHostPtr(ctx, devs[d]);
+
+            if(supported)
+                std::cout << "    HOST_PTR appears supported.\n";
+            else
+                std::cout << "    HOST_PTR appears NOT supported.\n";
+
+            clReleaseContext(ctx);
+        }
+    }
+
+    return 0;
+}
@@ -0,0 +1,94 @@
+#define CL_TARGET_OPENCL_VERSION 300
+#include <CL/cl.h>
+#include <iostream>
+#include <vector>
+#include <chrono>
+#include <cstring>
+#include <cstdlib>
+
+void checkCLError(cl_int err, const char* msg) {
+    if (err != CL_SUCCESS) {
+        std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl;
+        exit(1);
+    }
+}
+
+int main() {
+    cl_uint numPlatforms = 0;
+    checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms");
+    std::vector<cl_platform_id> platforms(numPlatforms);
+    checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms");
+
+    std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n";
+
+    for (cl_uint p = 0; p < numPlatforms; ++p) {
+        char platformName[256];
+        clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr);
+        std::cout << "Platform " << p << ": " << platformName << "\n";
+
+        cl_uint numDevices = 0;
+        clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
+        std::vector<cl_device_id> devices(numDevices);
+        clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr);
+
+        for (cl_uint d = 0; d < numDevices; ++d) {
+            char deviceName[256];
+            clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
+            std::cout << "  Device " << d << ": " << deviceName << "\n";
+
+            cl_bool unifiedMem = CL_FALSE;
+            clGetDeviceInfo(devices[d], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unifiedMem), &unifiedMem, nullptr);
+            std::cout << "    Host-Device unified memory: " << (unifiedMem ? "Yes" : "No") << "\n";
+
+#ifdef CL_DEVICE_SVM_CAPABILITIES
+            cl_device_svm_capabilities svmCaps = 0;
+            clGetDeviceInfo(devices[d], CL_DEVICE_SVM_CAPABILITIES, sizeof(svmCaps), &svmCaps, nullptr);
+            std::cout << "    SVM capabilities:\n";
+            if (!svmCaps) std::cout << "      None\n";
+            if (svmCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER)
+                std::cout << "      - Coarse-grain buffer sharing\n";
+            if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)
+                std::cout << "      - Fine-grain buffer sharing\n";
+            if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)
+                std::cout << "      - Fine-grain system sharing\n";
+            if (svmCaps & CL_DEVICE_SVM_ATOMICS)
+                std::cout << "      - Atomics supported\n";
+#endif
+
+            // Optional runtime test: check if CL_MEM_USE_HOST_PTR buffer reuses pointer
+            const size_t bufSize = 1024 * 1024;
+            std::vector<char> hostBuffer(bufSize, 42);
+
+            cl_int err;
+            cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
+            checkCLError(err, "create context");
+
+            cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err);
+            checkCLError(err, "create buffer");
+
+            cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
+            cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
+            checkCLError(err, "create queue");
+
+            // Simple host → device → host round-trip test
+            cl_event evt;
+
+            auto start = std::chrono::high_resolution_clock::now();
+
+            void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err);
+            checkCLError(err, "map buffer");
+            clWaitForEvents(1, &evt);
+
+            clEnqueueUnmapMemObject(q, buf, mapped, 0, nullptr, nullptr);
+            clReleaseMemObject(buf);
+            auto end = std::chrono::high_resolution_clock::now();
+            std::chrono::duration<double, std::milli> elapsed = end - start;
+            std::cout << "    Map latency: " << elapsed.count() << " ms (lower → likely zero-copy)\n";
+
+            clReleaseCommandQueue(q);
+            clReleaseContext(ctx);
+        }
+        std::cout << std::endl;
+    }
+    return 0;
+}
@@ -0,0 +1,184 @@
+#define CL_TARGET_OPENCL_VERSION 300
+#include <CL/cl.h>
+#include <iostream>
+#include <vector>
+#include <chrono>
+#include <cstring>
+#include <cstdlib>
+
+void checkCLError(cl_int err, const char* msg) {
+    if (err != CL_SUCCESS) {
+        std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl;
+        exit(1);
+    }
+}
+
+// --------------------
+// Kernel source
+// Simple mock kernel that simulates splitting XYZ/I
+// Each "point" is 16 bytes (XYZ + Intensity)
+const char* kernelSrc = R"CLC(
+__kernel void xyz_i_split(__global uchar* assembly,
+                          __global uchar* xyzOut,
+                          __global uchar* iOut,
+                          const uint numPoints) {
+    uint gid = get_global_id(0);
+    if (gid >= numPoints) return;
+
+    uint offset = gid * 16;
+    // Copy XYZ (12 bytes) to xyzOut
+    for (int i=0; i<12; ++i)
+        xyzOut[gid*12 + i] = assembly[offset + i];
+
+    // Copy Intensity (4 bytes) to iOut
+    for (int i=0; i<4; ++i)
+        iOut[gid*4 + i] = assembly[offset + 12 + i];
+}
+)CLC";
+
+int main() {
+    // --------------------
+    // CHANGE THIS VALUE to set number of points per assembly buffer
+    const size_t numPointsPerAssembly = 100000; // e.g., ~3333 points per fill
+    const size_t bytesPerPoint = 16;          // 12 bytes XYZ + 4 bytes I
+
+    const size_t assemblyBufSize = numPointsPerAssembly * bytesPerPoint;
+    const size_t xyzBufSize = numPointsPerAssembly * 12;
+    const size_t iBufSize = numPointsPerAssembly * 4;
+
+    cl_uint numPlatforms = 0;
+    checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms");
+    std::vector<cl_platform_id> platforms(numPlatforms);
+    checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms");
+
+    std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n";
+
+    for (cl_uint p = 0; p < numPlatforms; ++p) {
+        char platformName[256];
+        clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr);
+        std::cout << "Platform " << p << ": " << platformName << "\n";
+
+        cl_uint numDevices = 0;
+        clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
+        std::vector<cl_device_id> devices(numDevices);
+        clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr);
+
+        for (cl_uint d = 0; d < numDevices; ++d) {
+            char deviceName[256];
+            clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
+            std::cout << "  Device " << d << ": " << deviceName << "\n";
+
+            cl_int err;
+            cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
+            checkCLError(err, "create context");
+
+            cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
+            cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
+            checkCLError(err, "create queue");
+
+            // --------------------
+            // Allocate host buffers
+            std::vector<unsigned char> assemblyHost(assemblyBufSize, 42);
+            std::vector<unsigned char> xyzHost(xyzBufSize, 0);
+            std::vector<unsigned char> iHost(iBufSize, 0);
+
+            std::vector<unsigned char> xyzHostCPU(xyzBufSize, 0);
+            std::vector<unsigned char> iHostCPU(iBufSize, 0);
+
+            // Create CL buffers
+            cl_mem assemblyBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, assemblyBufSize, assemblyHost.data(), &err);
+            checkCLError(err, "create assembly buffer");
+            cl_mem xyzBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, xyzBufSize, xyzHost.data(), &err);
+            checkCLError(err, "create xyz buffer");
+            cl_mem iBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, iBufSize, iHost.data(), &err);
+            checkCLError(err, "create i buffer");
+
+            // Build program
+            cl_program prog = clCreateProgramWithSource(ctx, 1, &kernelSrc, nullptr, &err);
+            checkCLError(err, "create program");
+
+            err = clBuildProgram(prog, 1, &devices[d], nullptr, nullptr, nullptr);
+            if (err != CL_SUCCESS) {
+                // Print build log
+                size_t logSize = 0;
+                clGetProgramBuildInfo(prog, devices[d], CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize);
+                std::vector<char> log(logSize);
+                clGetProgramBuildInfo(prog, devices[d], CL_PROGRAM_BUILD_LOG, logSize, log.data(), nullptr);
+                std::cerr << log.data() << "\n";
+            }
+            checkCLError(err, "build program");
+
+            cl_kernel kernel = clCreateKernel(prog, "xyz_i_split", &err);
+            checkCLError(err, "create kernel");
+
+            // Set kernel args
+            clSetKernelArg(kernel, 0, sizeof(cl_mem), &assemblyBuf);
+            clSetKernelArg(kernel, 1, sizeof(cl_mem), &xyzBuf);
+            clSetKernelArg(kernel, 2, sizeof(cl_mem), &iBuf);
+            clSetKernelArg(kernel, 3, sizeof(cl_uint), &numPointsPerAssembly);
+
+            const size_t globalWorkSize = numPointsPerAssembly;
+
+            // --------------------
+            // Run a few iterations
+            for (int iter = 0; iter < 10; ++iter) {
+                cl_event evt;
+                auto t0 = std::chrono::high_resolution_clock::now();
+
+                void* mappedAssembly = clEnqueueMapBuffer(q, assemblyBuf, CL_TRUE, CL_MAP_READ, 0, assemblyBufSize, 0, nullptr, &evt, &err);
+                checkCLError(err, "map assembly buffer");
+                clWaitForEvents(1, &evt);
+
+                auto t1 = std::chrono::high_resolution_clock::now();
+
+                err = clEnqueueNDRangeKernel(q, kernel, 1, nullptr, &globalWorkSize, nullptr, 0, nullptr, &evt);
+                checkCLError(err, "enqueue kernel");
+                clWaitForEvents(1, &evt);
+
+                auto t2 = std::chrono::high_resolution_clock::now();
+
+                cl_event unmapEvt;
+                err = clEnqueueUnmapMemObject(q, assemblyBuf, mappedAssembly, 0, nullptr, &unmapEvt);
+                checkCLError(err, "unmap assembly buffer");
+                clWaitForEvents(1, &unmapEvt);
+
+                auto t3 = std::chrono::high_resolution_clock::now();
+
+                // --------------------
+                // Host CPU split
+                auto cpuStart = std::chrono::high_resolution_clock::now();
+                for (size_t pt = 0; pt < numPointsPerAssembly; ++pt) {
+                    size_t off = pt * 16;
+                    for (int i = 0; i < 12; ++i)
+                        xyzHostCPU[pt*12 + i] = assemblyHost[off + i];
+                    for (int i = 0; i < 4; ++i)
+                        iHostCPU[pt*4 + i] = assemblyHost[off + 12 + i];
+                }
+                auto cpuEnd = std::chrono::high_resolution_clock::now();
+
+                std::chrono::duration<double, std::milli> mapElapsed = t1 - t0;
+                std::chrono::duration<double, std::milli> kernelElapsed = t2 - t1;
+                std::chrono::duration<double, std::milli> unmapElapsed = t3 - t2;
+                std::chrono::duration<double, std::milli> cpuElapsed = cpuEnd - cpuStart;
+
+                std::cout << "Iteration " << iter
+                          << " | Map: " << mapElapsed.count()
+                          << " ms | Kernel: " << kernelElapsed.count()
+                          << " ms | Unmap: " << unmapElapsed.count()
+                          << " ms | CPU Split: " << cpuElapsed.count() << " ms\n";
+            }
+
+            // Cleanup
+            clReleaseKernel(kernel);
+            clReleaseProgram(prog);
+            clReleaseMemObject(assemblyBuf);
+            clReleaseMemObject(xyzBuf);
+            clReleaseMemObject(iBuf);
+            clReleaseCommandQueue(q);
+            clReleaseContext(ctx);
+        }
+        std::cout << std::endl;
+    }
+    return 0;
+}
+
@@ -0,0 +1,300 @@
+#define CL_TARGET_OPENCL_VERSION 300
+#include <CL/cl.h>
+#include <iostream>
+#include <vector>
+#include <chrono>
+#include <cstring>
+#include <cstdlib>
+#include <mutex>
+#include <condition_variable>
+
+void checkCLError(cl_int err, const char* msg) {
+    if (err != CL_SUCCESS) {
+        std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl;
+        exit(1);
+    }
+}
+
+// Callback context for waiting on events
+struct CallbackContext {
+    std::mutex mtx;
+    std::condition_variable cv;
+    bool completed;
+    cl_int status;
+    std::chrono::high_resolution_clock::time_point* timestamp;
+};
+
+// Helper function to wait for callback completion
+void waitForCallback(CallbackContext& ctx) {
+    std::unique_lock<std::mutex> lock(ctx.mtx);
+    ctx.cv.wait(lock, [&ctx] { return ctx.completed; });
+std::cout <<"waitForCallback cv.wait() returned.\n";
+}
+
+// Static callback for map buffer event
+void CL_CALLBACK mapEventCallback(cl_event /*event*/, cl_int event_command_exec_status, void* user_data) {
+    CallbackContext* ctx = static_cast<CallbackContext*>(user_data);
+std::cout <<"mapEventCallback called and about to lock mutex.\n";
+    std::unique_lock<std::mutex> lock(ctx->mtx);
+    ctx->status = event_command_exec_status;
+    if (ctx->timestamp) {
+        *ctx->timestamp = std::chrono::high_resolution_clock::now();
+    }
+    ctx->completed = true;
+    ctx->cv.notify_one();
+std::cout <<"mapEventCallback just notified.\n";
+}
+
+// Static callback for kernel execution event
+void CL_CALLBACK kernelEventCallback(cl_event /*event*/, cl_int event_command_exec_status, void* user_data) {
+    CallbackContext* ctx = static_cast<CallbackContext*>(user_data);
+std::cout <<"mapEventCallback called and about to lock mutex.\n";
+    std::unique_lock<std::mutex> lock(ctx->mtx);
+    ctx->status = event_command_exec_status;
+    if (ctx->timestamp) {
+        *ctx->timestamp = std::chrono::high_resolution_clock::now();
+    }
+    ctx->completed = true;
+    ctx->cv.notify_one();
+std::cout <<"mapEventCallback just notified.\n";
+}
+
+// Static callback for unmap buffer event
+void CL_CALLBACK unmapEventCallback(cl_event /*event*/, cl_int event_command_exec_status, void* user_data) {
+    CallbackContext* ctx = static_cast<CallbackContext*>(user_data);
+std::cout <<"mapEventCallback called and about to lock mutex.\n";
+    std::unique_lock<std::mutex> lock(ctx->mtx);
+    ctx->status = event_command_exec_status;
+    if (ctx->timestamp) {
+        *ctx->timestamp = std::chrono::high_resolution_clock::now();
+    }
+    ctx->completed = true;
+    ctx->cv.notify_one();
+std::cout <<"mapEventCallback just notified.\n";
+}
+
+// --------------------
+// Kernel source
+// Simple mock kernel that simulates splitting XYZ/I
+// Each "point" is 16 bytes (XYZ + Intensity)
+const char* kernelSrc = R"CLC(
+__kernel void xyz_i_split(__global uchar* assembly,
+                          __global uchar* xyzOut,
+                          __global uchar* iOut,
+                          const uint numPoints) {
+    uint gid = get_global_id(0);
+    if (gid >= numPoints) return;
+
+    uint offset = gid * 16;
+    // Copy XYZ (12 bytes) to xyzOut
+    for (int i=0; i<12; ++i)
+        xyzOut[gid*12 + i] = assembly[offset + i];
+
+    // Copy Intensity (4 bytes) to iOut
+    for (int i=0; i<4; ++i)
+        iOut[gid*4 + i] = assembly[offset + 12 + i];
+}
+)CLC";
+
+int main() {
+    // --------------------
+    // CHANGE THIS VALUE to set number of points per assembly buffer
+    const size_t numPointsPerAssembly = 100000; // e.g., ~3333 points per fill
+    const size_t bytesPerPoint = 16;          // 12 bytes XYZ + 4 bytes I
+
+    const size_t assemblyBufSize = numPointsPerAssembly * bytesPerPoint;
+    const size_t xyzBufSize = numPointsPerAssembly * 12;
+    const size_t iBufSize = numPointsPerAssembly * 4;
+
+    cl_uint numPlatforms = 0;
+    checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms");
+    std::vector<cl_platform_id> platforms(numPlatforms);
+    checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms");
+
+    std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n";
+
+    for (cl_uint p = 0; p < numPlatforms; ++p) {
+        char platformName[256];
+        clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr);
+        std::cout << "Platform " << p << ": " << platformName << "\n";
+
+        cl_uint numDevices = 0;
+        clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
+        std::vector<cl_device_id> devices(numDevices);
+        clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr);
+
+        for (cl_uint d = 0; d < numDevices; ++d) {
+            char deviceName[256];
+            clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
+            std::cout << "  Device " << d << ": " << deviceName << "\n";
+
+            cl_int err;
+            cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
+            checkCLError(err, "create context");
+
+            cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
+            cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
+            checkCLError(err, "create queue");
+
+            // --------------------
+            // Allocate host buffers
+            std::vector<unsigned char> assemblyHost(assemblyBufSize, 42);
+            std::vector<unsigned char> xyzHost(xyzBufSize, 0);
+            std::vector<unsigned char> iHost(iBufSize, 0);
+
+            std::vector<unsigned char> xyzHostCPU(xyzBufSize, 0);
+            std::vector<unsigned char> iHostCPU(iBufSize, 0);
+
+            // Create CL buffers
+            cl_mem assemblyBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, assemblyBufSize, assemblyHost.data(), &err);
+            checkCLError(err, "create assembly buffer");
+            cl_mem xyzBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, xyzBufSize, xyzHost.data(), &err);
+            checkCLError(err, "create xyz buffer");
+            cl_mem iBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, iBufSize, iHost.data(), &err);
+            checkCLError(err, "create i buffer");
+
+            // Build program
+            cl_program prog = clCreateProgramWithSource(ctx, 1, &kernelSrc, nullptr, &err);
+            checkCLError(err, "create program");
+
+            err = clBuildProgram(prog, 1, &devices[d], nullptr, nullptr, nullptr);
+            if (err != CL_SUCCESS) {
+                // Print build log
+                size_t logSize = 0;
+                clGetProgramBuildInfo(prog, devices[d], CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize);
+                std::vector<char> log(logSize);
+                clGetProgramBuildInfo(prog, devices[d], CL_PROGRAM_BUILD_LOG, logSize, log.data(), nullptr);
+                std::cerr << log.data() << "\n";
+            }
+            checkCLError(err, "build program");
+
+            cl_kernel kernel = clCreateKernel(prog, "xyz_i_split", &err);
+            checkCLError(err, "create kernel");
+
+            // Set kernel args
+            clSetKernelArg(kernel, 0, sizeof(cl_mem), &assemblyBuf);
+            clSetKernelArg(kernel, 1, sizeof(cl_mem), &xyzBuf);
+            clSetKernelArg(kernel, 2, sizeof(cl_mem), &iBuf);
+            clSetKernelArg(kernel, 3, sizeof(cl_uint), &numPointsPerAssembly);
+
+            const size_t globalWorkSize = numPointsPerAssembly;
+
+            // --------------------
+            // Run a few iterations
+            for (int iter = 0; iter < 10; ++iter) {
+                auto t0 = std::chrono::high_resolution_clock::now();
+                std::chrono::high_resolution_clock::time_point t1, t2, t3;
+
+                cl_event mapEvt;
+                void* mappedAssembly = clEnqueueMapBuffer(q, assemblyBuf, CL_FALSE, CL_MAP_READ, 0, assemblyBufSize, 0, nullptr, &mapEvt, &err);
+                checkCLError(err, "map assembly buffer");
+
+                // Retain event to keep it alive until callback completes
+                err = clRetainEvent(mapEvt);
+                checkCLError(err, "retain map event");
+
+                // Wait for map event using callback
+                CallbackContext mapCtx;
+                mapCtx.completed = false;
+                mapCtx.timestamp = &t1;
+                err = clSetEventCallback(mapEvt, CL_COMPLETE, mapEventCallback, &mapCtx);
+                checkCLError(err, "set map event callback");
+                // Force queue flush to ensure event processing
+                err = clFlush(q);
+                checkCLError(err, "flush queue");
+std::cout <<"About to waitForCalllback for clEnqueueMapBuffer.\n";
+                waitForCallback(mapCtx);
+                checkCLError(mapCtx.status, "map buffer");
+
+                // Release event after callback completes
+                err = clReleaseEvent(mapEvt);
+                checkCLError(err, "release map event");
+
+                cl_event kernelEvt;
+                err = clEnqueueNDRangeKernel(q, kernel, 1, nullptr, &globalWorkSize, nullptr, 0, nullptr, &kernelEvt);
+                checkCLError(err, "enqueue kernel");
+
+                // Retain event to keep it alive until callback completes
+                err = clRetainEvent(kernelEvt);
+                checkCLError(err, "retain kernel event");
+
+                // Wait for kernel event using callback
+                CallbackContext kernelCtx;
+                kernelCtx.completed = false;
+                kernelCtx.timestamp = &t2;
+                err = clSetEventCallback(kernelEvt, CL_COMPLETE, kernelEventCallback, &kernelCtx);
+                checkCLError(err, "set kernel event callback");
+                // Force queue flush to ensure event processing
+                err = clFlush(q);
+                checkCLError(err, "flush queue");
+std::cout <<"About to waitForCalllback for clEnqueueNDRangeKernel.\n";
+                waitForCallback(kernelCtx);
+                checkCLError(kernelCtx.status, "kernel execution");
+
+                // Release event after callback completes
+                err = clReleaseEvent(kernelEvt);
+                checkCLError(err, "release kernel event");
+
+                cl_event unmapEvt;
+                err = clEnqueueUnmapMemObject(q, assemblyBuf, mappedAssembly, 0, nullptr, &unmapEvt);
+                checkCLError(err, "unmap assembly buffer");
+
+                // Retain event to keep it alive until callback completes
+                err = clRetainEvent(unmapEvt);
+                checkCLError(err, "retain unmap event");
+
+                // Wait for unmap event using callback
+                CallbackContext unmapCtx;
+                unmapCtx.completed = false;
+                unmapCtx.timestamp = &t3;
+                err = clSetEventCallback(unmapEvt, CL_COMPLETE, unmapEventCallback, &unmapCtx);
+                checkCLError(err, "set unmap event callback");
+                // Force queue flush to ensure event processing
+                err = clFlush(q);
+                checkCLError(err, "flush queue");
+std::cout <<"About to waitForCalllback for clEnqueueUnmapMemObject.\n";
+                waitForCallback(unmapCtx);
+                checkCLError(unmapCtx.status, "unmap buffer");
+
+                // Release event after callback completes
+                err = clReleaseEvent(unmapEvt);
+                checkCLError(err, "release unmap event");
+
+                // --------------------
+                // Host CPU split
+                auto cpuStart = std::chrono::high_resolution_clock::now();
+                for (size_t pt = 0; pt < numPointsPerAssembly; ++pt) {
+                    size_t off = pt * 16;
+                    for (int i = 0; i < 12; ++i)
+                        xyzHostCPU[pt*12 + i] = assemblyHost[off + i];
+                    for (int i = 0; i < 4; ++i)
+                        iHostCPU[pt*4 + i] = assemblyHost[off + 12 + i];
+                }
+                auto cpuEnd = std::chrono::high_resolution_clock::now();
+
+                std::chrono::duration<double, std::milli> mapElapsed = t1 - t0;
+                std::chrono::duration<double, std::milli> kernelElapsed = t2 - t1;
+                std::chrono::duration<double, std::milli> unmapElapsed = t3 - t2;
+                std::chrono::duration<double, std::milli> cpuElapsed = cpuEnd - cpuStart;
+
+                std::cout << "Iteration " << iter
+                          << " | Map: " << mapElapsed.count()
+                          << " ms | Kernel: " << kernelElapsed.count()
+                          << " ms | Unmap: " << unmapElapsed.count()
+                          << " ms | CPU Split: " << cpuElapsed.count() << " ms\n";
+            }
+
+            // Cleanup
+            clReleaseKernel(kernel);
+            clReleaseProgram(prog);
+            clReleaseMemObject(assemblyBuf);
+            clReleaseMemObject(xyzBuf);
+            clReleaseMemObject(iBuf);
+            clReleaseCommandQueue(q);
+            clReleaseContext(ctx);
+        }
+        std::cout << std::endl;
+    }
+    return 0;
+}
+
@@ -0,0 +1,117 @@
+#define CL_TARGET_OPENCL_VERSION 300
+#include <CL/cl.h>
+#include <iostream>
+#include <vector>
+#include <cstring>
+
+#define CHECK(err, msg) \
+    if (err != CL_SUCCESS) { \
+        std::cerr << "ERROR: " << msg << " (" << err << ")\n"; \
+        return 1; \
+    }
+
+const char *kernelSrc = R"CLC(
+__kernel void check_shared(__global const int* in, __global int* out) {
+    int gid = get_global_id(0);
+    out[gid] = in[gid] + 42;  // simple deterministic transform
+}
+)CLC";
+
+int main() {
+    cl_int err;
+
+    // Pick first available device
+    cl_uint numPlatforms;
+    CHECK(clGetPlatformIDs(0, nullptr, &numPlatforms), "clGetPlatformIDs count");
+    std::vector<cl_platform_id> plats(numPlatforms);
+    CHECK(clGetPlatformIDs(numPlatforms, plats.data(), nullptr), "clGetPlatformIDs");
+
+    cl_platform_id plat = plats[0];
+    cl_device_id dev;
+    CHECK(clGetDeviceIDs(plat, CL_DEVICE_TYPE_GPU, 1, &dev, nullptr), "clGetDeviceIDs");
+
+    cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err);
+    CHECK(err, "clCreateContext");
+
+    cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
+    cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
+    CHECK(err, "clCreateCommandQueueWithProperties");
+
+    // Create program and kernel
+    const size_t srcLen = std::strlen(kernelSrc);
+    cl_program prog = clCreateProgramWithSource(ctx, 1, &kernelSrc, &srcLen, &err);
+    CHECK(err, "clCreateProgramWithSource");
+
+    err = clBuildProgram(prog, 1, &dev, nullptr, nullptr, nullptr);
+    if (err != CL_SUCCESS) {
+        size_t logSize;
+        clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize);
+        std::vector<char> log(logSize);
+        clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, logSize, log.data(), nullptr);
+        std::cerr << "--- Build Log ---\n" << log.data() << "\n";
+        return 1;
+    }
+
+    cl_kernel krn = clCreateKernel(prog, "check_shared", &err);
+    CHECK(err, "clCreateKernel");
+
+    const size_t N = 8;
+    size_t bufSize = N * sizeof(int);
+
+    // Allocate host-visible buffer
+    cl_mem bufIn = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufSize, nullptr, &err);
+    CHECK(err, "clCreateBuffer input");
+    cl_mem bufOut = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, bufSize, nullptr, &err);
+    CHECK(err, "clCreateBuffer output");
+
+    // Map the buffer (should return pointer to real host memory if unified)
+    int* hostPtr = (int*)clEnqueueMapBuffer(q, bufIn, CL_TRUE, CL_MAP_WRITE, 0, bufSize, 0, nullptr, nullptr, &err);
+    CHECK(err, "clEnqueueMapBuffer");
+
+    std::cout << "Mapped host pointer: " << static_cast<void*>(hostPtr) << "\n";
+
+    // Write pattern directly into mapped memory
+    for (size_t i = 0; i < N; ++i)
+        hostPtr[i] = 100 + i;
+
+    // No clEnqueueWriteBuffer call!  We rely on shared memory.
+    clEnqueueUnmapMemObject(q, bufIn, hostPtr, 0, nullptr, nullptr);
+    clFinish(q);
+
+    // Set kernel args
+    clSetKernelArg(krn, 0, sizeof(cl_mem), &bufIn);
+    clSetKernelArg(krn, 1, sizeof(cl_mem), &bufOut);
+
+    size_t global = N;
+    err = clEnqueueNDRangeKernel(q, krn, 1, nullptr, &global, nullptr, 0, nullptr, nullptr);
+    CHECK(err, "clEnqueueNDRangeKernel");
+    clFinish(q);
+
+    // Read back result
+    int* outPtr = (int*)clEnqueueMapBuffer(q, bufOut, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, nullptr, &err);
+    CHECK(err, "map output");
+
+    std::cout << "Result: ";
+    for (size_t i = 0; i < N; ++i)
+        std::cout << outPtr[i] << " ";
+    std::cout << "\n";
+
+    // Validate
+    bool ok = true;
+    for (size_t i = 0; i < N; ++i)
+        if (outPtr[i] != static_cast<int>(142 + i)) ok = false;
+
+    std::cout << (ok ? "✅ GPU saw host writes (zero-copy confirmed)\n"
+                     : "❌ GPU did not see host writes (copying or staging occurred)\n");
+
+    clEnqueueUnmapMemObject(q, bufOut, outPtr, 0, nullptr, nullptr);
+    clFinish(q);
+
+    clReleaseMemObject(bufIn);
+    clReleaseMemObject(bufOut);
+    clReleaseKernel(krn);
+    clReleaseProgram(prog);
+    clReleaseCommandQueue(q);
+    clReleaseContext(ctx);
+    return 0;
+}
@@ -0,0 +1,13 @@
+add_subdirectory(bodies)
+
+add_daps_target(all_device_specs
+    SOURCES
+        avia0.dapss
+        win0.dapss
+)
+
+# Register this target for later dependency addition from main CMakeLists.txt
+register_daps_target(all_device_specs)
+# Make the DAPSS target part of the ALL target for this subdirectory
+# This ensures DAPSS targets are built when building just this subdirectory
+set_property(TARGET all_device_specs PROPERTY FOLDER "devices")
@@ -0,0 +1,3 @@
+edev|avia0|mesh()|livoxGen1()|livoxProto1()|3JEDK380010Z39||
+edev|avia0|pcloudIntensity()|livoxGen1()|livoxProto1()|3JEDK380010Z39||
+edev|avia0|pcloudAmbience()|livoxGen1()|livoxProto1()|3JEDK380010Z39
@@ -0,0 +1,17 @@
+add_daps_target(body_rpi5_persys
+    SOURCES
+        rpi5-persys.dapss
+)
+
+add_daps_target(body_dell_laptop
+	SOURCES
+		dell-laptop.dapss
+)
+
+# Register this target for later dependency addition from main CMakeLists.txt
+register_daps_target(body_rpi5_persys)
+register_daps_target(body_dell_laptop)
+# Make the DAPSS target part of the ALL target for this subdirectory
+# This ensures DAPSS targets are built when building just this subdirectory
+set_property(TARGET body_rpi5_persys PROPERTY FOLDER "devices/bodies")
+set_property(TARGET body_dell_laptop PROPERTY FOLDER "devices/bodies")
@@ -0,0 +1,3 @@
+#include "../win0.dapss"
+||
+#include "../avia0.dapss"
@@ -0,0 +1,3 @@
+#include "../win0.dapss"
+||
+#include "../avia0.dapss"
@@ -0,0 +1 @@
+edev|win0|visual-qualeiface()|xcb(dev-substring)|xorg(display=1|screen=0)|mut
@@ -1,10 +1,11 @@
-# DeviceSpec: API `drm()` from server `linux()`:
+# DeviceSpec: stim-buff-api `drm()` from provider `linux()`:

-The API is Linux DRM/KMS. The server is Linux itself. This server provides
-direct capture of frames at the kernel so it works for both Linux and Wayland.
-There's a program known as GPU Screen Recorder that is able to use this to
-capture specific windows on X11, but the window-specific capture doesn't work
-with Wayland. Irrespective, whole-screen capture works on both GFX servers.
+The stim-buff-api is Linux DRM/KMS. The provider is Linux itself. This provider
+enables direct capture of frames at the kernel level, so it works for both X11
+and Wayland. There is a program called GPU Screen Recorder that can use this
+API to capture specific windows on X11, but window-specific capture does not
+work with Wayland. However, whole-screen capture works on both graphics
+servers.

 Notes:
 * `modetest` utility in ubuntu package `libdrm-tests` is relevant.
@@ -1,24 +1,25 @@
-# DeviceSpec: API `xcb`, provider `xorg`
+# DAP Spec: stim-buff-api `xcb`, provider `xorg`

 ## Overview

-The `xcb` API with the `xorg` provider allows Salmanoff to interact with Xorg
+The `xcb` stim-buff-api with the `xorg` provider allows Salmanoff to interact with Xorg
 server windows. This can be used to capture visual data from specific windows 
 or entire screens managed by the Xorg server.

-## DeviceSpec Format
+## DAP Spec Format

-The general format of a device-spec for the `xcb` API with the `xorg` provider 
+The general format of a DAP spec for the `xcb` stim-buff-api with the `xorg` provider 
 is:
 ```
-sensor-type|implexor|xcb(api-params)|xorg(provider-params)|deviceSelector
+sensor-type|dev-identifier|quale-iface-api|xcb(stim-buff-api-params)|xorg(provider-params)|deviceSelector
 ```

 * `sensor-type` is always either '`+idev`' (interoceptor), '`+edev`' 
  (extrospector), or '`+adev`' (actuator).
-* `implexor` is the name of the implexor algorithm that should be used with 
-  the data that is provided by the `provider` via the `api`.
-* `api` is `xcb` in this case, and the `api-params` in parentheses may be 
+* `dev-identifier` is a user-defined name for this specific device instance.
+* `quale-iface-api` is the name of the StimIface library that should be used to 
+  process the data from the stim buffer.
+* `stim-buff-api` is `xcb` in this case, and the `stim-buff-api-params` in parentheses may be 
  omitted, in which case the parentheses will be empty, but the parentheses 
  must always be written out.
 * `provider` is `xorg` in this case, and the `provider-params` in parentheses 
@@ -28,11 +29,11 @@ sensor-type|implexor|xcb(api-params)|xorg(provider-params)|deviceSelector
  identify the specific window or screen you want to access via that 
  `provider`.

-## `api-params` and `provider-params`
+## `stim-buff-api-params` and `provider-params`

-### `api-params`
+### `stim-buff-api-params`

-The `api-params` for the `xcb` API can include the following:
+The `stim-buff-api-params` for the `xcb` stim-buff-api can include the following:

 * `dev-id` or `devid`: Specifies that the `deviceSelector` is a numeric window 
  ID. The ID can be specified in decimal or hexadecimal format.
@@ -97,25 +98,25 @@ xcb(dev-string)|My\ Exact\ Window\ Name

 ### To attach a specific window by name (substring match):
 ```
-+edev|visual-implexor|xcb(dev-substring)|xorg(display=0|screen=0)|my-window
+edev|my-window|visual-qualeiface|xcb(dev-substring)|xorg(display=0|screen=0)|my-window
 ```
 This will attach to a window whose name contains "my-window" as a substring.

 ### To attach a specific window by exact name:
 ```
-+edev|visual-implexor|xcb(dev-string)|xorg(display=0|screen=0)|My\ Exact\ Window\ Name
+edev|my-window|visual-qualeiface|xcb(dev-string)|xorg(display=0|screen=0)|My\ Exact\ Window\ Name
 ```
 This will attach to a window whose name exactly matches "My Exact Window Name".

 ### To attach a specific window by numeric ID:
 ```
-+edev|visual-implexor|xcb(dev-id)|xorg(display=0|screen=0)|123456
+edev|my-window|visual-qualeiface|xcb(dev-id)|xorg(display=0|screen=0)|123456
 ```
 This will attach to a window with the numeric ID `123456`.

 ### To attach the entire screen:
 ```
-+edev|visual-implexor|xcb()|xorg(display=0|screen=0)|all
+edev|my-screen|visual-qualeiface|xcb()|xorg(display=0|screen=0)|all
 ```
 This will attach to the entire screen `0` of display `0`.

@@ -0,0 +1,67 @@
+Ok: I realized I may be able to bridge async sequences without needing needing to make a bunch of functions async. The basic thing is:
+
+```
+funcThatCallsAsyncFuncsButWhoseSignatureIsItselfSync(ComponentThread &ct)
+{
+  std::atomic continuationCondition(false);
+
+  async_call(ct, [] {
+    // Do stuff that will enqueue events on ct.
+    continuationCondition.store(true);
+     ComponentThread::getSelf()->getIoService()
+      .post([]{});
+  });
+  for (;;)
+  {
+    /* We, the thread actually executing this sequence
+     * here, may not actually be the thread that owns
+     * ct, in which case, ct's owner will dequeue the stuff
+     * that async_call() sent it to do, then conclude its
+     * processing. It may or may not send a message back
+     * to the thread executing this sync sequence here.
+     *
+     * If we are the same thread as ct, then wonderful:
+     * we will dequeue the messsage ourself and process
+     * it, then conclude the processing. We may or may
+     * not send a message back to ourself at the end of
+     * the processing but it doesn't matter since we'll
+     * have to check the condvar in the loop post-
+     * conditions before the next iteration.
+     *
+     * The problem is the first case where ct is a foreign
+     * thread which may not send us a wakeup message
+     * when condvar has been modified, and we may
+     * hypothetically never get another signal from any
+     * other thread.
+     * This can be solved by just ensuring that this thread
+     * always gives a callback to async_call() which first
+     * modifies condvar, and then sends an empty message
+     * to itself.
+     */
+    ct.run_one();
+    if (continuationCondition.load()==true
+      ||!ct.isRunnable())
+    {
+      break;
+    }
+  }
+
+  /* We've now bridged the async calls into this
+   * sync function's body without losing any
+   * event responsiveness for the main loop.
+   */
+  // Continue executing normally...
+}
+```
+
+This should be a complete solution for async bridging.
+
+Now let's try it first in the sendHandshake sequence. We'll try to use non-blocking socket api calls to send the heartbeat and wait for a response asynchronously. Don't use boost:asio:socket functions because they cause a segfault due to a bug (see /CMakelists.txt). To wait for events on the socket, setup boost to wait on the socket or bind FD.
+
+Async_call is OBVIOUSLY NOT a function you're expected to implement. It's merely a placeholder for any async sequence we call inside of the sync function.
+
+You're expected to get rid of the io_context auto-scope object inside of executeHandshake, and use the stored Device.componentThread's io_context instead. You are expected to refrain from associating Device.componentThread.io_service with any boost::asio::socket stuff. You will have to write manual posix socket api code, and use a boost::file_descriptor to catch wakeup events on the socket from the UDP events like recv-data-ready.
+
+You're basically going to break up executeHandshake into a series of lambdas, but use the pattern I described above to keep it as one synchronous function by bridging it. executeHandshake is the synchronous function that must remain synchronous to its caller. Internally, you must split up executeHandshake into several lambdas, and then at the end you set the condvar and post a message back to CompnentThread::getSelf()->io_service. Then, since executeHandshake() has a bridging sequence that loops and calls run_one() until the condvar is set, executeHandshake will resume executing after that loop exits.
+
+Makes sense?
@@ -1,282 +0,0 @@
-# Adaptive Resource Acquisition with Re-queuing
-
-## Overview
-
-This document describes a novel synchronization pattern that combines the benefits of spinlocks, mutexes, and queuing systems while avoiding their respective drawbacks. The pattern is designed for high-throughput async systems where multiple threads need to coordinate access to shared resources without blocking or wasting CPU cycles.
-
-## Problem Statement
-
-Traditional synchronization mechanisms have significant trade-offs that limit system performance:
-
- **Mutexes**: Block threads, causing context switches and reduced throughput
- **Spinlocks**: Waste CPU cycles while waiting, preventing other work from proceeding
- **Pure Queuing**: Serializes all operations, reducing parallelism unnecessarily
-
-The challenge is to maintain data consistency across multi-segment async operations while maximizing system throughput. In high-performance systems, the overhead of context switching can be substantial, and CPU cycles are precious resources that should not be wasted on busy-waiting.
-
-## Core Concept
-
-The Adaptive Resource Acquisition pattern uses **atomic flags on shared objects** combined with **immediate re-queuing** to achieve optimal performance characteristics:
-
-1. **No thread blocking** - Threads never sleep or context switch, maintaining maximum responsiveness
-2. **No CPU waste** - No busy-waiting when other work could proceed, ensuring efficient resource utilization
-3. **Maximum throughput** - Threads always process available work, maximizing system productivity
-4. **Data consistency** - Atomic resource acquisition preserves integrity without traditional locking overhead
-
-This approach fundamentally changes how we think about resource coordination, treating it as a flow management problem rather than a blocking synchronization problem.
-
-## Architecture
-
-### Resource Objects
-
-Each shared object that requires synchronization carries an atomic flag that indicates its availability. This flag serves as the primary coordination mechanism, allowing threads to atomically claim ownership without the overhead of traditional locks.
-
-The resource object structure is intentionally simple, containing only the essential coordination mechanism and the resource-specific data. This minimalism reduces memory overhead and improves cache locality.
-
-### Request Structure
-
-Async operations are encapsulated as requests that specify their resource requirements and the operation to be performed. This encapsulation allows the system to reason about resource dependencies before attempting execution, enabling intelligent scheduling decisions.
-
-The request structure includes metadata such as priority levels, which can be used for advanced scheduling policies. This flexibility allows the system to adapt to different workload characteristics and business requirements.
-
-### Resource Manager
-
-The core component orchestrates resource acquisition and request processing through a sophisticated coordination mechanism. It maintains a registry of all available resources and manages the flow of requests through the system.
-
-The resource manager operates on a simple principle: attempt to acquire all required resources atomically, and if successful, execute the operation immediately. If any resource is unavailable, the request is immediately re-queued for later processing without any blocking or waiting.
-
-## Algorithm
-
-### Resource Acquisition Process
-
-The resource acquisition process follows a simple but effective strategy. For each request, the system attempts to atomically acquire all required resources in a single pass. This atomicity is crucial for maintaining data consistency and preventing race conditions.
-
-If all resources can be acquired atomically, the operation proceeds immediately. This represents the optimal case where no coordination overhead is incurred beyond the atomic operations themselves. The system achieves maximum throughput in this scenario.
-
-If any resource cannot be acquired, the system immediately releases any resources that were successfully acquired and re-queues the request. This approach ensures that resources are never held unnecessarily and that the system can continue processing other requests without delay.
-
-The key insight is that failed acquisition attempts are not failures in the traditional sense, but rather normal flow control mechanisms. The system treats resource contention as a scheduling opportunity rather than a blocking condition.
-
-#### Atomic Resource Acquisition Pseudocode
-
-```
-TRY_ACQUIRE_RESOURCES(resource_names):
-    acquired_resources = []
-    
-    FOR EACH resource_name IN resource_names:
-        resource = GET_RESOURCE(resource_name)
-        expected_value = false
-        desired_value = true
-        
-        // Atomic compare-and-swap operation
-        IF ATOMIC_COMPARE_EXCHANGE_STRONG(resource.flag, expected_value, desired_value):
-            // Successfully acquired this resource
-            acquired_resources.ADD(resource)
-        ELSE:
-            // Failed to acquire this resource
-            // Release all previously acquired resources
-            FOR EACH acquired_resource IN acquired_resources:
-                ATOMIC_STORE(acquired_resource.flag, false)
-            RETURN false
-    
-    // Successfully acquired all resources
-    RETURN true
-```
-
-### Request Processing Workflow
-
-The request processing workflow is designed for maximum efficiency. Each request is processed exactly once per cycle, either by successful execution or by re-queuing for later processing.
-
-When a request is successfully processed, the system immediately releases all acquired resources, making them available for other requests. This rapid resource turnover maximizes system throughput and minimizes resource contention.
-
-The re-queuing mechanism ensures that no request is lost, while the immediate nature of the re-queuing prevents any blocking or waiting. Requests that cannot be processed immediately simply wait their turn in the queue, allowing other requests to proceed without interference.
-
-#### Basic Processing Algorithm
-
-```
-PROCESS_REQUEST(request):
-    // Step 1: Dequeue the request
-    request = DEQUEUE_FROM_QUEUE()
-    
-    // Step 2: Attempt atomic resource acquisition
-    resources_acquired = []
-    acquisition_successful = true
-    
-    FOR EACH resource_name IN request.required_resources:
-        resource = GET_RESOURCE(resource_name)
-        IF ATOMIC_COMPARE_EXCHANGE(resource.flag, false, true):
-            resources_acquired.ADD(resource)
-        ELSE:
-            acquisition_successful = false
-            BREAK
-    
-    // Step 3: Handle acquisition result
-    IF acquisition_successful:
-        // Execute the operation
-        EXECUTE_OPERATION(request.operation)
-        
-        // Release all acquired resources
-        FOR EACH resource IN resources_acquired:
-            ATOMIC_STORE(resource.flag, false)
-    ELSE:
-        // Release any partially acquired resources
-        FOR EACH resource IN resources_acquired:
-            ATOMIC_STORE(resource.flag, false)
-        
-        // Re-queue the request for later processing
-        ENQUEUE_REQUEST(request)
-```
-
-### Event Loop Management
-
-The event loop continuously processes requests from the queue until no more requests are available. This simple loop structure ensures that the system is always making progress on available work.
-
-The loop processes requests in the order they were queued, providing a natural fairness mechanism. However, the system can be extended with priority queuing or other scheduling policies to meet specific requirements.
-
-The event loop is designed to be efficient and non-blocking, ensuring that the system remains responsive even under high load conditions.
-
-#### Main Event Loop Pseudocode
-
-```
-MAIN_EVENT_LOOP():
-    WHILE true:
-        // Check if there are requests to process
-        IF QUEUE_IS_EMPTY():
-            BREAK
-        
-        // Dequeue the next request
-        request = DEQUEUE_FROM_QUEUE()
-        
-        // Process the request (this includes re-queuing if needed)
-        PROCESS_REQUEST(request)
-        
-        // Continue with next request
-        CONTINUE
-```
-
-#### Multi-threaded Worker Loop
-
-```
-WORKER_THREAD():
-    WHILE true:
-        // Wait for work to become available
-        request = WAIT_FOR_REQUEST()
-        
-        // Process the request
-        PROCESS_REQUEST(request)
-        
-        // Return to waiting state
-        CONTINUE
-```
-
-## Multi-Threaded Implementation
-
-### Thread-Safe Coordination
-
-In a multi-threaded environment, the resource manager must coordinate access to its internal data structures while maintaining the non-blocking characteristics of the pattern. This coordination is achieved through careful use of atomic operations and minimal locking.
-
-The queue management uses traditional mutex-based synchronization, but only for the queue operations themselves. The critical resource acquisition path remains lock-free, ensuring that the performance benefits of the pattern are preserved.
-
-Worker threads continuously process requests from the shared queue, attempting to acquire resources and execute operations. The coordination between threads is handled implicitly through the atomic resource flags, eliminating the need for explicit thread synchronization in the critical path.
-
-### Worker Thread Behavior
-
-Worker threads operate in a continuous loop, processing requests as they become available. Each thread independently attempts to acquire resources and execute operations, creating natural parallelism without explicit coordination.
-
-The worker threads are designed to be lightweight and efficient, with minimal overhead beyond the actual resource acquisition and operation execution. This design allows the system to scale effectively with the number of available CPU cores.
-
-The thread coordination is handled through the shared queue and atomic resource flags, creating a self-balancing system that naturally distributes work across available threads.
-
-## Use Cases
-
-### Device Management Systems
-
-In device management systems, multiple operations may need to coordinate access to physical or logical devices. The adaptive resource acquisition pattern provides an elegant solution for managing these complex coordination requirements.
-
-For example, when attaching a device, the system may need to coordinate access to the device itself, the device registry, and various system resources. The pattern allows these operations to proceed atomically when resources are available, while gracefully handling contention through re-queuing.
-
-The device management system can handle complex multi-step operations that require coordination across multiple resources, all while maintaining high throughput and responsiveness.
-
-### Database Connection Pools
-
-Database connection pools are a natural fit for the adaptive resource acquisition pattern. Each database operation requires access to a connection from the pool, and the pattern provides efficient coordination without the overhead of traditional locking.
-
-The pattern allows the system to process multiple database operations concurrently when connections are available, while gracefully handling periods of high contention. The re-queuing mechanism ensures that no operations are lost, even during peak load periods.
-
-The connection pool can implement sophisticated scheduling policies, such as priority queuing for different types of operations, while maintaining the performance benefits of the pattern.
-
-## Performance Characteristics
-
-### Throughput Analysis
-
-The performance characteristics of the adaptive resource acquisition pattern are determined by the resource contention patterns in the system. In the best case, when resources are readily available, the system achieves maximum throughput with minimal overhead.
-
-In the worst case, when resources are heavily contended, the system gracefully degrades to a queuing behavior, ensuring that all operations eventually complete. The system maintains fairness and prevents starvation through the natural ordering of the queue.
-
-The average case performance represents the typical operating conditions, where the system achieves optimal parallelism while handling occasional resource contention through re-queuing.
-
-### Comparison with Traditional Methods
-
-The adaptive resource acquisition pattern provides a unique combination of performance characteristics that are not achievable with traditional synchronization mechanisms:
-
- **Mutexes** provide data consistency but at the cost of thread blocking and context switching overhead
- **Spinlocks** avoid context switching but waste CPU cycles during contention
- **Pure queuing** avoids both blocking and CPU waste but serializes operations unnecessarily
-
-The adaptive pattern combines the best aspects of these approaches while avoiding their drawbacks, creating a solution that is both efficient and practical.
-
-## Advanced Features
-
-### Priority Queuing
-
-The system can be extended with priority queuing to handle different types of operations with varying importance. High-priority operations can be processed before lower-priority operations, ensuring that critical operations receive timely attention.
-
-The priority queuing mechanism integrates seamlessly with the existing re-queuing behavior, allowing the system to maintain its performance characteristics while providing sophisticated scheduling capabilities.
-
-### Resource Groups
-
-Complex operations may require coordination across multiple related resources. Resource groups allow the system to treat related resources as a single unit for acquisition purposes, simplifying the coordination logic for complex operations.
-
-Resource groups can be used to implement sophisticated resource management policies, such as ensuring that related resources are always acquired together or implementing resource reservation mechanisms.
-
-### Fairness Mechanisms
-
-The system can implement various fairness mechanisms to ensure that all requests receive fair treatment over time. Round-robin processing, aging mechanisms, and other fairness policies can be implemented while maintaining the performance benefits of the pattern.
-
-Fairness mechanisms are particularly important in systems where different types of operations have different resource requirements, ensuring that no operation type dominates the system resources.
-
-## Implementation Considerations
-
-### Memory Management
-
-The pattern requires careful attention to memory management, particularly for the request objects and resource metadata. Smart pointers and object pooling can be used to minimize memory allocation overhead and improve performance.
-
-The system should implement proper cleanup mechanisms for failed operations and ensure that resources are always released, even in error conditions.
-
-### Error Handling
-
-Robust error handling is essential for maintaining system reliability. The system should gracefully handle operation failures, resource unavailability, and other error conditions without affecting the overall system performance.
-
-Retry mechanisms with exponential backoff can be implemented for transient failures, while deadlock detection and resolution mechanisms can handle more complex failure scenarios.
-
-### Monitoring and Debugging
-
-The system should provide comprehensive monitoring capabilities to track performance metrics, resource utilization, and queue behavior. These metrics are essential for tuning the system and identifying performance bottlenecks.
-
-Debugging support should include detailed logging of resource acquisition attempts, queue operations, and operation execution, allowing developers to understand and optimize system behavior.
-
-## Conclusion
-
-The Adaptive Resource Acquisition pattern provides a novel solution to the classic synchronization dilemma. By combining atomic operations with intelligent re-queuing, it achieves maximum throughput while maintaining data consistency and avoiding the overhead of traditional synchronization mechanisms.
-
-This pattern is particularly well-suited for high-performance async systems where traditional synchronization mechanisms would create unacceptable overhead. The pattern's simplicity and effectiveness make it a valuable addition to the toolkit of concurrent programming patterns.
-
-The pattern represents a fundamental shift in how we think about resource coordination, treating it as a flow management problem rather than a blocking synchronization problem. This shift enables new levels of performance and scalability in concurrent systems.
-
-The adaptive resource acquisition pattern is particularly valuable in:
- High-performance async systems where throughput is critical
- Resource-constrained environments where CPU cycles are precious
- Systems requiring predictable latency and responsiveness
- Multi-threaded applications with complex shared state requirements
-
-By providing a practical solution to the synchronization dilemma, this pattern enables developers to build high-performance concurrent systems without sacrificing simplicity or reliability.
@@ -0,0 +1,17 @@
+# Postrin path design:
+
+## Negtrin and postrin weighting:
+
+I am skeptical that treating negtrins and postrins as having equal importance
+will produce a working system.
+
+### Frustrator Negtrin model: Postrins as intrinsically desirable:
+
+In the frustrator model, postrins are intrinsically desirable and negtrins are
+only grasped as important in that they forcibly direct Drctr's attention away
+from its sampling/pursuit of a postrin. Because of this they're grasped as being
+bad because they frustrate the intrinsic goal of pursuing/sampling a postrin.
+
+### Equiprioritized intrin model:
+
+In this model 
@@ -0,0 +1,603 @@
+I just realized that my spinqueueing mechanism is highly power inefficient if a
+lock needs to be held across a "true async wait"—where the async sequence
+actually waits on a hardware bottleneck. In this case, the thread acquires the
+spinlock, then goes to sleep in the kernel schedq until some hardware event
+occurs, and is then awakened—all while still holding the spinlock.
+
+Meanwhile, other sequences running on other threads and contending for that lock
+will be Qspinning. This is acceptable if all I care about is maximum throughput:
+the Qspinning just re-posts the sequences back into the Q, and eventually
+they'll acquire the LockSet and proceed.
+
+Importantly, since the thread itself isn't slept in the kschedQ, it will be
+deQing and processing other sequences that aren't bottlenecked on the lock held
+by the sequence waiting for the hardware response. Throughput is indeed
+maximized.
+
+However, I just realized that if kernel mutexes expose FD events, I can apply
+this same logic to sleeplocks: I can wait on the sleeplocks asynchronously
+instead of synchronously. If I can make my asio::io_service wait on all the
+mutex FDs requested by sequences on the current thread, then in theory I can put
+the thread to sleep and know that when the mutex becomes available, I'll be
+awakened again.
+
+Hence, I can get the best of both worlds: maximum throughput and power saving.
+Instead of spinqueueing, we just add the lock FDs to an FD set to be waited on
+by asio. If any of those locks become available, the kernel scheduler will
+awaken our asioQ thread, and we can then awaken and retry the lock.
+
+## Boost asio queue-based sleep locking:
+
+Instead of using FDs, we can also try to use a fifo Q based mechanism: each lock
+is a spinlock and a fifo queue.
+
+Acquire:
+```
+lock(spinlock);
+q.push_back(self);
+head = q.peek_front();
+if (head == self) {
+	// We acquired the lock.
+	unlock(spinlock);
+	return;
+}
+
+unlock(spinlock);
+```
+
+release:
+```
+lock(spinlock);
+// Should get back ourself.
+q.pop_front();
+// Wake up the next request in the q.
+head = q.peek_front();
+if (head == NULL) {
+	// Nobody was waiting.
+	unlock(spinlock);
+	return;
+}
+
+head.thread_to_wake.getIoService().post([]{
+	// This lambda causes thread_to_wake to check this lock's
+	// Q and then proceed to execute since it now owns the lock.
+});
+unlock(spinlock);
+```
+
+Something like this: it causes the entire thing to be, at least ostensibly,
+in userspace -- though idk how Boost handles its queues internally.
+
+## Priortizing LockSets:
+
+One problem we have with a FIFO-based sleeping system is that it makes it very
+unlikely that LockSets will ever acquire all of their locks, if there are
+contenders for those same locks who only need to acquire one of the locks in
+that LockSet.
+
+We could theoretically give locksets an advantage by not making them backout
+if they fail to acquire all locks in their set. I.e: if they get 2/3, then they
+hold those 2 and then wait for the 3rd. This is problematic because it leaves
+room open for deadlocks in the form of T1 and T2 needing both LockA and LockB,
+but they acquire them in reverse order. I.e: T1 takes LockA and now waits for
+LockB; and T2 takes LockB and now waits for LockA. This will now happen among
+the LockSets if we don't impose backing out. It may be possible to avoid this
+using very careful lock ordering and dependency analysis but this project is
+asynchronous the locking is done in the async sequences and not in the sync
+accessor functions. So this kind of analysis is almost impossible to do.
+
+
+We need to think of a way to make the FIFOs biased toward LockSets so that they
+have an advantage over single-lock acquirers. Or else LockSet sequences will be
+starved.
+
+### Timed backoff:
+
+We could have Locksets be greedy and try to hold on to the locks they've
+acquired (say, 2/3 and then wait for the 3rd) but then be forced to backoff
+after a timeout.
+
+This introduces async event complexity and also the timeout we choose is almost
+guaranteed to be arbitrary.
+
+### Fractionally inserted FIFOs:
+
+We insert sequences with a LockSet.size() of 1, at the back.
+We insert all other sequences (>1) into first 1/LockSet.size()th position in the
+Queue.
+So a Lockset of size 2 will be inserted at the end of the first half of the
+items in the queue.
+A Lockset of size 3 will be inserted at the end of the first 33% of items.
+A lockset of size 4 will be inserted at the end of the first 25% of items.
+And so on.
+
+This ensures that higher LockSet.size()s will be prioritized ever higher, and
+at the same time they don't completely hog everything. Those single-lock
+sequences that have already naturally progressed past the fraction-mark of a
+given LockSet size will continue making progress toward the front.
+
+For queueing sequences with Locksets>1, we can enQ them on the FIFO of the first
+lock in their set. They'll back off each time anyway, so they'll always be
+re-trying from the first lock in their set each time.
+
+#### Impl details:
+
+We'd like to use std::unordered_set because insertion will require lots of
+moving items around, but we'll have to use std::vector because we need direct
+access to insert at arbitrary fractional indexes. It's unlikely the number of
+items in any lock's Q will ever be large enough to require lots of displacement,
+but welp there's no reason not to plan for scaling. Although if we end up
+needing scaling that's a symptom of a bigger problem...with scaling itself lol.
+There shouldn't be enough items blocked on a lock that we have to design the
+lock's queue to be scalable.
+
+### Inverted Fractionally acquired locksets:
+
+The previous ideas of fractionally inserted lockQs was okay, but the acquisition
+algo required that the async seq be at the front of a locks queue to
+successfully acquire that lock. That makes it almost impossible for Locksets>1
+to ever acquire all of their locks. If we add backoff to that, it basically
+means no lockset will ever acquire all of its locks.
+
+Instead what we now do is always insert at the rear (push_back()) and then when
+acquiring, we check to see if the sequence is in the first
+1/(1/(LockSet.size())), and if so, it successfully acquires the lock. I.e: if
+the sequence item isn't in the LAST 1/(LockSet.size()) items, then it succeeds.
+* For a lockset of size=1: It must be at the front of the queue.
+* Lockset.size=2: it must be in the first 50% of items.
+* Lockset.size=3: it must be in the first 66% of items.
+* Lockset.size=4: It must be in the first 75% of items.
+
+So this way larger LockSets are favoured, but 1-size locksets make progress.
+
+For performance:
+* We obv can just scan the smaller tail percentage for the item instead of
+  scanning the larger front percentage.
+* If we use a doubly-linked list, we can prolly keep the insertion iterator
+  and this way we won't have to actually find the item in the lockQ when we wish
+  to eventually remove it from the lockQ when releasing the lock.
+
+## Total overall design:
+
+### Asio queues and Lockvokers:
+
+Lockvokers are initially enqueued on a CompThread's queue. When the lockvoker
+first runs, it checks a flag to see if it has been "registered" into the queues
+for all locks in its set. If not, then it "registers" itself in each lock's
+ticketQ and then attempts to acquire each lock. Registration and acquisition
+are logically separate operations; and locks will often attempt acquisition
+many times after first registering, without needing to register again. Ideally
+we can implement a LockSet::registerAndTryAcquireAll() method, but that's for
+us to think about later.
+
+```
+/* We'll need to rename current class LockSpec to LockSet. */
+class LockSet
+{
+	/* Add this either inside of LockSet or outside of it -- depends on whether
+	 * it's we can get it to compile because I'm seeing some potential circular
+	 * definition dependencies.
+	 */
+	typedef std::pair<Qutex, LockerAndInvokerList::iterator>
+		LockUsageDesc;
+
+	/* Find a LockUsageDesc -- useful below */
+	LockUsageDesc &getLockUsageDesc(Qutex &criterionLock)
+	{
+		for (auto &reqLock: requiredLocks) {
+			if (reqLock.first == &criterionLock) { return reqLock; }
+		}
+
+		// Should never happen.
+		throw;
+	}
+};
+
+LockSet::register(LockerAndInvoker &lockvoker)
+{
+	for (auto &lock: lockset.locks) {
+		// Register the Lockvoker object in each lock's ticketQ.
+		lock.second = lock.first.register(lockvoker);
+	}
+	registered = true;
+}
+
+bool LockSet::tryAcquire(LockerAndInvoker &lockvoker)
+{
+	if (!registered) {
+		// Should never happen.
+		throw ...;
+	}
+	int nLocksAcquired=0,
+		nLocksInSet = lockset.size();
+	for (auto &lock: lockset.locks) {
+		if (!lock.first.tryAcquire(nLocksInSet)) {
+			break;
+		}
+
+		nLocksAcquired++;
+	}
+
+	if (nLocksAcquired == nLocksInSet) {
+		// Success
+		return true;
+	}
+
+	for (int i=0; i<nLocksAcquired; i++) {
+		// Backoff does different stuff from release();
+		locks[i].first.backoff(lockvoker);
+	}
+}
+
+LockSet::release()
+{
+	for (auto &lock: requiredLocks) {
+		lock.release();
+	}
+}
+```
+
+Now, the Qutex class is what we'll use for synchronization. It's just a
+combination of a SpinLock, a sh_ptr<LockerAndInvoker> and a std::list.
+
+```
+class SpinLock
+{
+	/* Modify to add methods acquire() and release() which busy-wait.
+	 */
+	void acquire();
+	void release();
+};
+
+class LockSet
+{
+	/* Modify the std::vector of SpinLock to instead be:
+	 *	std::vector<LockUsageDesc> locks;
+	 */
+	std::vector<LockUsageDesc> locks;
+}
+
+bool LockerAndInvoker::operator==(const LockerAndInvoker &other)
+{
+	/* Compare by the address of the continuation objects. Why?
+	 * Because there's no guarantee that the lockvoker object that was
+	 * passed in by the io_service invocation is the same object as that
+	 * which is in the qutexQs. Especially because we make_shared() a
+	 * copy when registerInQutexQueues()ing.
+	 *
+	 * Generally when we "wake" a lockvoker by enqueuing it, boost's
+	 * io_service::post will copy the lockvoker object.
+	 */
+	return &this->serializedContinuation == &other.serializedContinuation;
+}
+
+bool LockerAndInvoker::operator !=(const LockerAndInvoker &other)
+{
+	return &this->serializedContinuation != &other.serializedContinuation;
+}
+
+class Qutex
+{
+public:
+	typedef std::list<LockerAndInvoker>		LockerAndInvokerList;
+
+	LockerAndInvokerList::iterator register(const LockerAndInvoker &lockvoker)
+	{
+		/** EXPLANATION:
+		 * Just insert the lockvoker into the rear of the list.
+		 *
+		 * Then, since we want to store the 
+		 */
+		LockerAndInvokerList::iterator it;
+
+		lock.acquire();
+		queue.push_back(lockvoker);
+		it = queue.end();
+		--it;
+		lock.release();
+
+		return it;
+	}
+
+	void unregister(LockerAndInvokerList::iterator it, bool shouldLock=1)
+	{
+		if (shouldLock)
+		{
+			lock.acquire();
+			queue.erase(it);
+			lock.release();
+		}
+		else{
+			queue.erase(it);
+		}
+	}
+
+	bool tryAcquire(LockerAndInvoker &tryingLockvoker)
+	{
+		const nRequiredLocks = tryingLockvoker.serializedContinuation
+			.requiredLocks.size();
+
+		lock.acquire();
+
+		const qNItems = queue.size();
+
+		if (qNItems < 1) {
+			lock.release();
+
+			/**	EXPLANATION:
+			 * requiredLocks before ever trying to tryAcquire() them, so if
+			 * tryAcquire is being called, that must mean that queue.size() > 0.
+			 *
+			 * Ergo this should never happen.
+			 */
+			throw;
+		}
+
+		if (!!currentOwner) {
+			lock.release();
+			return false;
+		}
+
+		/**	EXPLANATION:
+		 * From here:
+		 * if qNItems == 1 the we are the only one in the ticketQ and we have
+		 *	successfully acquired the lock.
+		 * If qNitems / nRequiredLocks == 0, then we acquire by default since
+		 *	the number of items in the ticketQ guarantees that we are in the top
+		 *	X% for that nRequiredLocks.
+		 * If qNItems / nRequiredLocks >= 1, then we must do the normal algo:
+		 *	Check the last (qNItems/nRequiredLocks) items, and if the item isn't
+		 *	in those items, then it must be in the earlier ones (obviously).
+		 *	Hence this Lockvoker acquisition should be considered successful.
+		 *
+		 *	EXPLANATION 2:
+		 * You'll notice that we don't do actual percentages but rather we just
+		 * do discrete fractions -- this makes the algo more deterministic
+		 * and much easier to reason about. I.e:
+		 *	If nRequiredLocks is 6 and qNItems==3:
+		 *		we don't actually calculate that the Lockvoker item must be in
+		 *		the top (100-17%), and then try to calculate whether we ought to
+		 *		consider the 3rd item to be in the last 17-percentile. We just
+		 *		do a fractional count and assume complete discreteness.
+		 */
+		const int nRearItemsToScan = qNItems / nRequiredLocks;
+
+		if (qNItems == 1 || nRearItemsToScan < 1) {
+			currOwner = tryingLockvoker;
+			lock.release();
+			return true;
+		}
+
+		/**	EXPLANATION:
+		 * For lockvokers that only have 1 requiredLock, they must be at the
+		 * front of the queue to successfully acquire.
+		 */
+		if (nRequiredLocks == 1)
+		{
+			bool ret=false;
+
+			if (tryingLockvoker == &queue.front())
+			{
+				currOwner = tryingLockvoker;
+				ret = true;
+			}
+
+			ret = false;
+			lock.release();
+			return ret;
+		}
+
+		auto rIt = queue.rbegin();
+		auto rEndIt = queue.rend();
+		bool foundInRear = false;
+		for (int i=0; i<nRearItemsToScan && rIt != rEndIt; rIt++, i++)
+		{
+			if (*rIt != tryingLockvoker) { continue; }
+
+			foundInRear = true;
+			break;
+		}
+
+		if (foundInRear) {
+			lock.release();
+			return false;
+		}
+
+		/* Not found in rear: this means the item is in the top X%. That means
+		 * it should be allowed to claim the lock.
+		 */
+		currOwner = tryingLockvoker;
+		lock.release();
+		return true;
+	}
+
+	backoff(LockerAndInvoker &failedAcquirer)
+	{
+		lock.acquire();
+
+		const int nQItems = queue.size();
+		// Rotate queue members if failedAcquirer is at front of queue.
+		LockerAndInvoker &currFront = queue.front();
+		if (currFront == failedAcquirer && nQItems > 1)
+		{
+			/**	EXPLANATION:
+			 * Rotate the top LockSet.size() items in the queue by moving
+			 * the failedAcquirer to the last position in the top
+			 * LockSet.size() items within the queue.
+			 *
+			 * I.e: if queue.size()==20, and lockSet.size()==5, then move
+			 * failedAcquirer from the front the 5th position in the queue,
+			 * which should push the other 4 items forward.
+			 * If queue.size==3 and LockSet.size()==5, then just
+			 * push_back(failedAcquirer).
+			 *
+			 * It is impossible for a Qutex queue to have only one
+			 * item in it, yet for that Lockvoker item to have failed to
+			 * acquire the Qutex. Being the only item in the ticketQ
+			 * means that you must succeed at acquiring the Qutex.
+			 */
+			int indexOfItemToInsertCurrFrontBehind = min(
+				nQItems - 1,
+				failedAcquirer.serializedContinuation.requiredLocks.size() - 1);
+
+			/*	EXPLANATION:
+			 * Rotate them here.
+			 *
+			 * The reason why we do this rotation is to avoid a particular kind
+			 * of deadlock wherein a grid of async requests is perfectly
+			 * configured so as to guarantee that none of them can make any
+			 * forward progress unless they get reordered.
+			 *
+			 * Consider 2 different locks with 2 different items in them
+			 * each, both of which come from 2 particular requests:
+			 *	Qutex1: Lockvoker1, Lv2
+			 *	Qutex2: Lv2, Lv1
+			 *
+			 * Moreover, both of these lockvokers have requiredLocks.size()==2,
+			 * and the particular 2 locks that each one requires are indeed
+			 * Qutex1 and Qutex2.
+			 *
+			 * This particular setup basically means that in TL1's queue, Lv1
+			 * will wakeup since it's at the front of TL1. It'll successfully
+			 * acquire TL1 (since it's at the front), and then it'll try to
+			 * acquire TL2. But since Lv1 isn't in the top 50% of items in TL2's
+			 * queue, Lv1 will fail to acquire TL2.
+			 *
+			 * Then similarly, in TL2's queue, Lv2 will wakeup since it's at
+			 * the front. Again, it'll successfully acquire TL2 since it's at
+			 * the front of TL2's queue. But then it'll try to acquire TL1.
+			 * Since it's not in the top 50% of TL1's enqueued items, it'll fail
+			 * to acquire TL1.
+			 *
+			 * N.B: This type of perfectly ordered deadlock can occur in any
+			 * kind of NxN situation where ticketQ.size()==requiredLocks.size().
+			 * That could be 4x4, 5x5, 6x6, etc. It doesn't happen in 1x1
+			 * because a Lockvoker that only requires one lock will always just
+			 * succeed if it's at the front of its queue.
+			 *
+			 * This state of affairs is stable and will persist unless these
+			 * queues are reordered in some way. Hence: that's why we rotate the
+			 * items in a QutexQ after backing off of it. Backing off means
+			 * Not necessarily that the calling LockVoker failed to acquire
+			 * THIS PARTICULAR Qutex, but rather than it failed to acquire
+			 * ALL of its required locks.
+			 *
+			 * Hence, if we are backing out, we should also rotate the items
+			 * in the queue if the current front item is the failed acquirer.
+			 * So that's why we do this rotation here.
+			 */
+			// The first arg (the iterator) is a ref in case it must be updated.
+			rotate(
+				currFront.serializedContinuation.requiredLocks.getLockDesc(
+					*this).second,
+				indexOfItemToInsertCurrFrontBehind);
+		}
+
+		currOwner.release();
+
+		LockerAndInvoker &newFront = queue.front();
+
+		lock.release();
+
+		/**	EXPLANATION:
+		 * Why should this never happen? Well, if we were at the front of the queue
+		 * and we failed to acquire the lock, we should have been rotated away from
+		 * the front. On the other hand, if we were not at the front of the queue
+		 * and we failed to acquire the lock, then we weren't at the front of the
+		 * queue to begin with.
+		 * The exception is if the queue has only one item in it.
+		 *
+		 * Hence there ought to be no way for the failedAcquirer to be at the front
+		 * of the queue at this point UNLESS the queue has only one item in it.
+		 */
+		if (newFront == failedAcquirer && nQItems > 1)
+		{
+			throw;
+		}
+
+		/**	EXPLANATION:
+		 * We should always awaken whoever is at the front of the queue, even if
+		 * we didn't rotate. Why? Consider this scenario:
+		 *
+		 *	Lv1 has LockSet.size==1. Lv2 has LockSet.size==3.
+		 *	Lv1's required lock overlaps with Lv2's set of 3 required locks.
+		 *	Lv1 registers itself in its 1 qutex's queue.
+		 *	Lv2 registers itself in all 3 of its qutexes' queues.
+		 *	Lv2 acquires the lock that it needs in common with Lv1.
+		 *		(Assume that Lv2 was not at the front of the common qutex's
+		 *		internal queue -- it only needed to be in the top 66%.)
+		 *	Lv1 tries to acquire the common lock and fails. It gets taken off of
+		 *		its io_service. It's now asleep until it gets
+		 *		re-added into an io_service.
+		 *	Lv2 fails to acquire the other 2 locks it needs and backoff()s from
+		 *		the common lock it shares with Lv1.
+		 *
+		 *	If Lv2 does NOT awaken the item at the front of the common lock's
+		 *	queue (aka: Lv1), then Lv1 is doomed to never wake up again.
+		 *
+		 * Hence: backout() callers should always wake up the lockvoker at the
+		 * front of their queue before leaving.
+		 *
+		 * The exception is if the item at the front is the backout() caller
+		 * itself. This can happen if, for example a multi-locking lockvoker
+		 * is backing off of a qutex within which it's the only waiter.
+		 */
+		if (nQItems > 1) {
+			wakeUp(newFront);
+		}
+	}
+
+	void release()
+	{
+		lock.acquire();
+
+		/* Get the saved iterator and use it to unregister.
+		 * Don't acquire lock because we already acquired it in this function.
+		 */
+		unregister(currOwner->serializedContinuation.requiredLocks
+			.getLockUsageDesc(*this).second, false);
+
+		currOwner.release();
+
+		/** EXPLANATION:
+		 * It would be nice to be able to optimize by only awakening if the
+		 * release()ing lockvoker was at the front of the qutexQ, but if we
+		 * don't unconditionally wakeup() the front item, we could get lost
+		 * wakeups. Consider:
+		 *
+		 *	Lv1 only has 1 requiredLock.
+		 *	Lv2 has 3 requiredLocks. One of its requiredLocks overlaps with
+		 *		Lv1's single requiredLock. So they both share a common lock.
+		 *	Lv3's currently owns Lv1 & Lv2's common requiredLock.
+		 *	Lv3 release()s that common lock.
+		 *	Lv1 happens to be next in queue after Lv3 unregisters itself.
+		 *	Lv3 wakes up Lv1.
+		 *	Just before Lv1 can acquire the common lock, Lv2 acquires it now,
+		 *		because it only needs to be in the top 66% to succeed.
+		 *	Lv1 checks the currOwner and sees that it's owned. Lv1 is now
+		 *		dequeued from its io_service. It won't be awakened until someone
+		 *		awakens it.
+		 *	Lv2 finishes its critical section and releas()es the common lock.
+		 *	Lv2 was not at the front of the qutexQ, so it does NOT awaken the
+		 *		current item at the front.
+		 *
+		 * Thus, Lv1 never gets awakened again. The end.
+		 * This also means that no LockSet.size()==1 lockvoker will ever be able
+		 * to run again since they can only run if they are at the front of the
+		 * qutexQ.
+		 *
+		 * Therefore we must always awaken the front item when releas()ing.
+		 */
+		LockerAndInvoker &front = queue.front();
+
+		lock.release();
+
+		wakeUp(front);
+	}
+
+public:
+	SpinLock					lock;
+	std::shared_ptr<LockerAndInvoker> currOwner;
+	LockerAndInvokerList		queue;
+};
+```
@@ -0,0 +1,59 @@
+# Spinqueueing: A new locking method that only blocks requests and not threads.
+
+The idea is that instead of using sleeplocks like mutexes, we instead only spin
+particular request objects by re-posting them to the queue.
+
+Particular requests may need a given shared resource. Instead of sleeping a
+whole thread while that particular request waits for the resource, we instead
+sleep the request itself by re-posting it into the thread's queue. This
+basically implements a kind of spinlock without busy-waiting. The underlying
+thread is never blocked unless it has no requests that can make forward
+progress.
+
+Forward progress through requests is only halted when an external resource is
+actually being waited on. Generally this will be an actual hardware event that
+is being waited on. No software bottlenecks will be slept on.
+
+All locks in the program are simple spinlocks, but the algorithm to spin on them
+is:
+
+## Each async call has a "locker and invoker":
+
+int funcThatCallsAnAsyncFunc(...)
+{
+	// Do preparatory stuff ...
+
+
+	// Post the lockvoker to the target thread.
+	targetThread.io_service.post(
+		[targetThread, /* args to asyncOperationReq captured here */]()
+		{
+			int nAcquired;
+			for (nAcquired=0; nAcquired<nLocksRequired; nAcquired++)
+			{
+				if (!requiredLocks->tryAcquire()) {
+					break;
+				}
+			}
+			if (nAcquired < nLocksRequired)
+			{
+				for (int i=0; i<nAcquired; i++) {
+					requiredLocks->release();
+				}
+
+				/* Unsure how to recapture the lambda object and re-enqueue it.
+				 * Dunno if that's even possible. But this is the essence of the
+				 * queue-spin system. We re-enqueue the lockvoker until it
+				 * gets all locks required. Then it will invoke the async
+				 * frontend.
+				 */
+				targetThead.io_service.post(this?);
+			}
+
+			managerObject.asyncOperationReq(
+				/* args to asyncOperationReq passed here */);
+		}
+	);
+}
+
+## Idk how to encapsulate lockvokers into a terse, reusable idiom.
@@ -0,0 +1,294 @@
+# Device Attachment Pipeline (DAP) Specification DSL: attaching sensors and actuators to SMO.
+
+## DAP Specs vs DA Specs:
+
+**DAP (Device Attachment Pipeline) specs** are human-readable DSL
+specifications that describe a pipeline of steps to connect a particular
+device role on a particular device-identifier to Salmanoff. This document
+describes the DAP specification format.
+
+**DA (Device Attachment) specs** are compiled binary structs used internally
+by SMO after DAP specs have been parsed into binary format. DA specs are the
+internal representation that the system actually uses.
+
+**Multiple Input Formats**: DAP specs can be parsed from multiple
+human-readable formats. For example, we intend to eventually extend ROS's
+URDF XML format to specify device attachment specs (URDFDA specs), which
+would also get compiled into the same DA spec binary format.
+
+## Attaching sensors:
+
+Sensors are input devices to Salmanoff. Salmanoff will perceive them as
+perceptual inputs -- like your own sense organs. For example, if you attach a
+camera as a sensor, salmanoff will experience it in the same way that you
+experience the visual sense data from your eyes.
+
+## QualeIface (Quale Interface):
+
+A QualeIface is a **Quale** **I**nter**face** library that connects to a
+particular stim buffer and allows the mind to process the stim features
+presented in the device's stim buffers. QualeIface libraries replace the
+previous notion of an implexor. They provide the interface between raw device
+data and the mind's processing capabilities.
+
+## Device Attachment Pipeline (DAP) Specification Format:
+
+The general format of a DAP specification is:
+```
+sensor-type|dev-identifier|quale-iface-api(quale-iface-api-params)|stim-buff-api(api-params)|provider(provider-params)|dev-selector
+```
+
+* `sensor-type` is always either '`+idev`' (interoceptor), '`+edev`'
+  (extrospector), or '`+adev`' (actuator).
+* `dev-identifier` is a user-defined name for this specific device instance.
+  This represents a logical device that can be accessed through multiple
+  providers and may expose multiple stim features. In a sense it's like a
+  sense organ or sense modality.
+* `quale-iface-api` is the name of the QualeIface library that should be used to
+  process the data from the stim buffer. This replaces the previous implexor
+  concept. The `quale-iface-api-params` in parentheses may be omitted, in which
+  case the parentheses will be empty, but the parentheses must always be written out.
+* `stim-buff-api` is the interface that provides access to a specific stim
+  buffer from the device. A single device may have multiple stim buffers
+  (e.g., audio output, microphone input, different data streams). The
+  `api-params` in parentheses may be omitted, in which case the parentheses
+  will be empty, but the parentheses must always be written out.
+* `provider` may be a userspace daemon or an OS kernel that provides access to
+  the device's I/O functionality; and thereby allows the `stim-buff-api` to
+  construct and present a stim-buffer to Salmanoff. The `provider-params` in
+  parentheses may be omitted, in which case the parentheses will be empty, but
+  the parentheses must always be written out.
+* `dev-selector` is the idiosyncratic label/name used by the `provider` to
+  identify the specific device you want to attach via that `provider`.
+
+## `quale-iface-api-params`, `stim-buff-api-params` and `provider-params`:
+
+If there's more than one parameter item in a list of `quale-iface-api-params`,
+`stim-buff-api-params`, or `provider-params`, then the individual items in a
+list should be separated by the h-bar character (`|`). E.g:
+```
+edev|soundcard0|audio-qualeiface(param1|param2)|alsa-audio(shmem|param2|param3)|alsa()|cardname
+```
+
+Each parameter must be in one of these forms:
+* key=value
+* key=
+* key
+
+### Important Note on `stim-buff-api-params`:
+
+The `stim-buff-api-params` should **never** include options related to the
+stim buffer's type or format. The `stim-buff-api` must read and infer such
+configuration details from the `quale-iface-api` portion of the DAP spec, and
+configure itself accordingly to enable connection by the specified
+quale-iface library in the way that it has been configured.
+
+`stim-buff-api-params` are for options that are:
+- Device-specific (not modality-wide)
+- Specific to this particular stim-feature as provided by this device
+- Configuration parameters needed by the stim-buff-api to properly interface
+  with the device
+
+Examples of appropriate `stim-buff-api-params`:
+- Buffer size settings
+- Device-specific communication parameters
+- Hardware-specific configuration options
+- Connection timeouts or retry settings
+
+Examples of **inappropriate** `stim-buff-api-params`:
+- Data format specifications (should be inferred from stim-iface-api)
+- Color space settings (should be determined by the stim-iface library)
+- Processing algorithm parameters (belong to the stim-iface library)
+
+## Logical View and Multiple Access Patterns:
+
+### Single Device, Multiple Providers:
+
+A single `dev-identifier` can unite several `dev-selectors` from multiple
+providers. For example, a sound card device `soundcard0` could be accessed
+through:
+
+* `ident: soundcard0, provider: alsa` - Provides access to the card via ALSA
+  API for audio output
+* `ident: soundcard0, provider: linux-driver-direct-file-ops` - Provides direct
+  connection to Linux driver via read/write posix FD calls for beeper sound
+  output
+* `ident: soundcard0, provider: alsa` - Provides access to the card via ALSA
+  for microphone input
+
+So a single physical device is accessed via multiple providers, each with
+different selectors.
+
+### Single Device, Same Provider, Different Stim-Buff-APIs:
+
+A device could have different `stim-buff-apis`, possibly provided by different
+shared libraries:
+
+* `ident: soundcard0, provider: alsa, stim-buff-api: alsa-audio` - For audio
+  output
+* `ident: soundcard0, provider: alsa, stim-buff-api: alsa-mic` - For microphone
+  input
+
+Different stim-buff-apis may be packaged into the same shared library, or
+multiple libraries may dlopen a common library behind the scenes.
+
+### Stim Features and Buffers:
+
+Logically, a `dev-identifier` represents a sense modality. Each device can
+export multiple stim features. For example, an eye can export:
+- Color data
+- Light intensity data
+- Thermal heat data
+- Pain input data
+
+Each stim feature is exposed as a stim buffer, provided by a `stim-buff-api`.
+Stim-buff-apis rely upon providers to implement the device-specific operations
+required to effectuate the stim-buff controls.
+
+## Examples:
+
+### To attach a particular window from a window manager:
+```
+edev|my-window|visual-qualeiface()|wayland()|wayland(server-socket)|window0
+```
+Connect to the Wayland server that's listening on `server-socket`, using the
+`wayland` stim-buff-api. Ask that Wayland server to give salmanoff read-access to all of
+the frames composited into the window buffer for `window0`. Use salmanoff's
+`visual-qualeiface` to process the visual data from that `window0`'s compositor buffer.
+
+### To attach a window manager's entire rendered desktop:
+```
+edev|my-desktop|visual-qualeiface()|wayland()|wayland(listen-socket)|all
+```
+In most cases, this is basically the same as attempting to attach all of the
+underlying GFX server's output.
+
+Connect to the Wayland server that's listening on `listen-socket`, using the
+`wayland` stim-buff-api. Ask that Wayland server to give salmanoff read-access to the
+entire compositor framebuffer. Use salmanoff's `visual-qualeiface` to process the visual data from
+that Wayland server's compositor buffer.
+
+### To attach all of an Xorg server's gfx output to all screens:
+```
+edev|my-xorg-display|visual-qualeiface()|x11()|xorg(listen-socket)|all
+```
+
+Connect to the Xorg server that's listening on `listen-socket`, using the `x11`
+stim-buff-api. Ask that Xorg server to let Salmanoff read out all of the frames written
+out to all screens. Use salmanoff's `visual-qualeiface` to process the visual data from the
+server's gfx framebuffer.
+
+In most cases, this is basically the same as attempting to attach all of the
+WM's output.
+
+* Implementation note:
+  https://stackoverflow.com/questions/33845447/how-do-i-talk-to-an-x-server-in-c-without-a-graphics-library
+  Seems relevant.
+
+### To attach all of an Xorg server's gfx output to a particular screen:
+```
+edev|my-screen|visual-qualeiface()|x11()|xorg(listen-socket)|:0
+```
+Connect to the Xorg server that's listening on `listen-socket`, using the `x11`
+stim-buff-api. Ask that Xorg server to let Salmanoff read out all of the frames written
+out to display `:0`. Use salmanoff's `visual-qualeiface` to process the visual data from display
+`:0`'s framebuffer.
+
+* Implementation note:
+  https://stackoverflow.com/questions/33845447/how-do-i-talk-to-an-x-server-in-c-without-a-graphics-library
+  Seems relevant.
+
+### To attach a camera device by connecting directly to its Linux driver:
+```
+edev|my-camera|visual-qualeiface()|v4l()|linux()|/dev/video0
+```
+We specify that we want to use the `linux` kernel's loaded driver to connect
+to communicate with `/dev/video0`, via the `Video4Linux` stim-buff-api. We want salmanoff
+to use the `visual-qualeiface` library to process the visual data from `/dev/video0`'s stim buffer.
+
+If `/dev/video0` is already consumed by another process, this may likely fail.
+
+### To attach a microphone that's managed by ALSA server:
+```
+edev|my-microphone|audio-qualeiface()|alsa-mic(shmem)|alsa()|cardname
+```
+
+Connect to the ALSA server via `shmem`, using the `alsa-mic` stim-buff-api. Request access to
+the microphone function of the sound card with the name `cardname`. Use the
+`audio-qualeiface` library to process the audio data from `cardname`'s microphone stim buffer.
+
+### To attach a thermal sensor managed by Linux:
+```
+idev|my-thermal|thermal-qualeiface()|thermal-zone()|linux()|/sys/class/thermal_zone0
+```
+
+Use the `thermal-zone` SysFS stim-buff-api provided by `linux` to connect to the sensor
+`/sys/class/thermal_zone0`. Use the `thermal-qualeiface` library to process the thermal data from
+`thermal_zone0`'s heat stim buffer.
+
+## Multiple Provider Examples:
+
+### Single Sound Card Device with Multiple Providers:
+
+The same physical sound card `soundcard0` can be accessed through different providers:
+
+```
+edev|soundcard0|audio-qualeiface()|alsa-audio()|alsa()|card0
+|| +edev|soundcard0|audio-qualeiface()|direct-file-ops()|linux()|/dev/snd/pcmC0D0p
+|| +idev|soundcard0|audio-qualeiface()|alsa-mic()|alsa()|card0
+```
+
+This shows:
+- `soundcard0` accessed via ALSA provider for audio output (`alsa-audio` stim-buff-api)
+- `soundcard0` accessed via Linux provider for direct file operations (`direct-file-ops` stim-buff-api)  
+- `soundcard0` accessed via ALSA provider for microphone input (`alsa-mic` stim-buff-api)
+
+### Single Camera Device with Multiple Stim-Buff-APIs:
+
+A camera device `camera0` might expose different data streams:
+
+```
+edev|camera0|visual-qualeiface()|v4l-rgb()|linux()|/dev/video0
+|| +edev|camera0|visual-qualeiface()|v4l-yuv()|linux()|/dev/video0
+|| +idev|camera0|thermal-qualeiface()|v4l-thermal()|linux()|/dev/video0
+```
+
+This shows the same camera device providing:
+- RGB color data via `v4l-rgb` stim-buff-api
+- YUV color data via `v4l-yuv` stim-buff-api
+- Thermal data via `v4l-thermal` stim-buff-api
+
+## Attaching actuators:
+
+Actuators are Salmanoff's way of enacting changes in the external world.
+They're like your libs, or your mouth. Actuators enable salmanoff to write
+outputs to the world outside.
+
+### Wilzors:
+
+Actuator devices are analogous to your body's limbs. Salmanoff controls these
+by using `wilzor` algorithms. Wilzor is a contraction of **Wil**lpower
+Actuat**Or** but with a 'Z' in the middle to make it sound cooler. Different
+types of devices will require different wilzor algorithms. You need to know
+what type of wilzor algorithm needs to be used to enable salmanoff to control
+your actuator device.
+
+The general format for an actuator's device attachment specification follows the same pattern:
+```
+adev|dev-identifier|wilzor-algorithm(quale-iface-api-params)|stim-buff-api(api-params)|provider(provider-params)|dev-selector
+```
+
+Where `wilzor-algorithm` is the specific wilzor algorithm needed to control the actuator device.
+
+## Device Attachment Pipeline (DAP) specification files:
+
+Inside of a DAP specification file, you can list any number of
+DAP specifications.
+Separate individual DAP specifications with two consecutive h-bar
+characters (`||`),
+like this:
+```
+edev|my-window|visual-qualeiface()|wayland()|wayland(server-socket)|window0
+|| +edev|my-xorg-display|visual-qualeiface()|x11()|xorg(listen-socket)|all
+|| +idev|my-thermal|thermal-qualeiface()|thermal-zone()|linux()|/sys/class/thermal_zone0
+```
@@ -1,168 +0,0 @@
-# Device Attachment Specification DSL: attaching sensors and actuators to SMO.
-
-## Attaching sensors:
-
-Sensors are input devices to Salmanoff. Salmanoff will perceive them as
-perceptual inputs -- like your own sense organs. For example, if you attach a
-camera as a sensor, salmanoff will experience it in the same way that you
-experience the visual sense data from your eyes.
-
-## Implexors:
-
-An implexor is an **Imp**licit **Ex**istent isolat**Or** algorithm. It's
-basically what conventional ML/LLM/ANN developers call an ROI ("Region of
-Interest") extraction algorithm. An Implex algorithm is used to scan a frame
-of input sensor data and detect objects and patterns within it.
-
-## Sensor device attachment specification:
-
-The general format of a device attachment specification for a sensor is:
-```
-sensor-type|dev-identifier
-  |implexor|api(api-params)|provider(provider-params)|deviceselector
-```
-
-* `sensor-type` is always either '`+idev`' (interoceptor), '`+edev`'
-  (extrospector), or '`+adev`' (actuator).
-* `dev-identifier` is a user-defined name for this specific device instance.
-* `implexor` is the name of the implexor algorithm that should be used with
-  the data that is provided by the `provider` via the `api`.
-* `api` is the interface that the provider uses to export perceptual data for
-  salmanoff to read. Salmanoff will run the `implexor` algorithm on the data
-  from this `api`. The `api-param` in parentheses may be omitted, in which
-  case the parentheses will be empty, but the parentheses must always be
-  written out.
-* `provider` may be a userspace daemon or an OS kernel that provides perceptual
-  data via the `api`. The `provider-params` in parentheses may be omitted, in
-  which case the parenthesis will be empty, but the parentheses must always be
-  written out.
-* `device selector` is the idiosyncratic label/name used by the `provider` to
-  identify the specific device you want to attach via that `provider`.
-
-## `API-params` and `provider-params`:
-
-If there's more than one parameter item in a list of `api-params` or
-`provider-params`, then the individual items in a list of `api-param` or
-`provider-params` should be separated by the h-bar character (`|`). E.g:
-```
-+edev|audio-implexor|alsa(shmem|param2|param3)|alsa()|cardname
-```
-
-Each parameter must be in one of these forms:
-* key=value
-* key=
-* key
-
-Some examples follow:
-
-### To attach a particular window from a window manager:
-```
-+edev|my-window|visual-implexor|wayland()|wayland(server-socket)|window0
-```
-Connect to the Wayland server that's listening on `server-socket`, using the
-`wayland` api. Ask that Wayland server to give salmanoff read-access to all of
-the frames composited into the window buffer for `window0`. Use salmanoff's
-`visual-implexor` to implex from that `window0`'s compositor data.
-
-### To attach a window manager's entire rendered desktop:
-```
-+edev|my-desktop|visual-implexor|wayland()|wayland(listen-socket)|all
-```
-In most cases, this is basically the same as attempting to attach all of the
-underlying GFX server's output.
-
-Connect to the Wayland server that's listening on `listen-socket`, using the
-`wayland` api. Ask that Wayland server to give salmanoff read-access to the
-entire compositor framebuffer. Use salmanoff's `visual-implexor` to implex from
-that Wayland server's compositor data.
-
-### To attach all of an Xorg server's gfx output to all screens:
-```
-+edev|my-xorg-display|visual-implexor|x11()|xorg(listen-socket)|all
-```
-
-Connect to the Xorg server that's listening on `listen-socket`, using the `x11`
-api. Ask that Xorg server to let Salmanoff read out all of the frames written
-out to all screens. Use salmanoff's `visual-implexor` to implex from the
-server's gfx framebuffer data.
-
-In most cases, this is basically the same as attempting to attach all of the
-WM's output.
-
-* Implementation note:
-  https://stackoverflow.com/questions/33845447/how-do-i-talk-to-an-x-server-in-c-without-a-graphics-library
-  Seems relevant.
-
-### To attach all of an Xorg server's gfx output to a particular screen:
-```
-+edev|my-screen|visual-implexor|x11()|xorg(listen-socket)|:0
-```
-Connect to the Xorg server that's listening on `listen-socket`, using the `x11`
-api. Ask that Xorg server to let Salmanoff read out all of the frames written
-out to display `:0`. Use salmanoff's `visual-implexor` to implex from display
-`:0`'s framebuffer data.
-
-* Implementation note:
-  https://stackoverflow.com/questions/33845447/how-do-i-talk-to-an-x-server-in-c-without-a-graphics-library
-  Seems relevant.
-
-### To attach a camera device by connecting directly to its Linux driver:
-```
-+edev|my-camera|visual-implexor|v4l()|linux()|/dev/video0
-```
-We specify that we want to use the `linux` kernel's loaded driver to connect
-to communicate with `/dev/video0`, via the `Video4Linux` API. We want salmanoff
-to use the `visual-implexor` algorithm to implex from `/dev/video0`'s data.
-
-If `/dev/video0` is already consumed by another process, this may likely fail.
-
-### To attach a microphone that's managed by ALSA server:
-```
-+edev|my-microphone|audio-implexor|alsa(shmem)|alsa()|cardname
-```
-
-Connect to the ALSA server via `shmem`, using the `alsa` API. Request access to
-the microphone function of the sound card with the name `cardname`. Use the
-`audio-implexor` algorithm to implex from `cardname`'s microphone data.
-
-### To attach a thermal sensor managed by Linux:
-```
-+idev|my-thermal|thermal-implexor|thermal-zone()|linux()|/sys/class/thermal_zone0
-```
-
-Use the `thermal-zone` SysFS API provided by `linux` to connect to the sensor
-`/sys/class/thermal_zone0`. Use the `thermal-implexor` implexor to implex from
-`thermal_zone0`'s heat data.
-
-## Attaching actuators:
-
-Actuators are Salmanoff's way of enacting changes in the external world.
-They're like your libs, or your mouth. Actuators enable salmanoff to write
-outputs to the world outside.
-
-### Wilzors:
-
-Actuator devices are analogous to your body's limbs. Salmanoff controls these
-by using `wilzor` algorithms. Wilzor is a contraction of **Wil**lpower
-Actuat**Or** but with a 'Z' in the middle to make it sound cooler. Different
-types of devices will require different wilzor algorithms. You need to know
-what type of wilzor algorithm needs to be used to enable salmanoff to control
-your actuator device.
-
-The general format for an actuator's device attachment specification is:
-```
-WIP: TBD.
-```
-
-## Device attachment specification files:
-
-Inside of a device attachment specification file, you can list any number of
-device attachment specifications.
-Separate individual device attachment specifications with two consecutive h-bar
-characters (`||`),
-like this:
-```
-+edev|my-window|visual-implexor|wayland()|wayland(server-socket)|window0
-|| +edev|my-xorg-display|visual-implexor|x11()|xorg(listen-socket)|all
-|| +idev|my-thermal|thermal-implexor|thermal-zone()|linux()|/sys/class/thermal_zone0
-```
@@ -0,0 +1,259 @@
+# LivoxGen1Lidar Device Attachment Pipeline (DAP) Specification
+
+## Overview
+
+The LivoxGen1Lidar DAP specification describes how to attach to Livox Gen1
+LiDAR devices and access their various data streams. The Livox Gen1 LiDAR
+presents multiple stim features, each with its own dedicated stim-buff-api.
+
+## Stim-Buff-API Structure
+
+The LivoxGen1Lidar DAP uses multiple stim-buff-api names, one for each stim feature it presents:
+
+* `livoxGen1-pcloud` - Point cloud coordinate data
+* `livoxGen1-pcloudIntensity` - Point cloud intensity/reflectivity data  
+* `livoxGen1-gyro` - Gyroscope data from internal IMU
+* `livoxGen1-accel` - Accelerometer data from internal IMU
+
+Each stim-buff-api is designed to work with specific stim-iface libraries that understand the data format and processing requirements for that particular stim feature.
+
+## DAP Specifications
+
+### 1. Point Cloud Intensity Data Device (Interoceptor)
+
+**Purpose**: Provides light intensity/reflectivity data from the LiDAR point cloud.
+
+**Syntax**:
+```
+idev | avia0 | pcloudIntensity | livoxGen1-pcloudIntensity(data-rate-hz=10) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+```
+
+**Stim-Buff-API**: `livoxGen1-pcloudIntensity`
+**Quale-Iface-API**: `pcloudIntensity` - Processes intensity/reflectivity data from point clouds
+
+### 2. Point Cloud Ambience Data Device (Interoceptor)
+
+**Purpose**: Provides ambience data from the LiDAR point cloud, counting high-intensity points per slot.
+
+**Syntax**:
+```
+idev | avia0 | pcloudAmbience | livoxGen1-pcloud(high-val=120) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+```
+
+**Stim-Buff-API**: `livoxGen1-pcloud`
+**Quale-Iface-API**: `pcloudAmbience` - Counts points with intensity >= threshold per slot
+
+**Ambience High Value Parameter** (for pcloudAmbience quale-iface-api):
+- **Parameter names**: `high-value` or `high-val` (synonyms)
+- **Purpose**: Threshold value for counting high-intensity points in the ambience buffer
+- **Default value**: 116
+- **Usage**: Points with intensity >= `ambienceHighVal` are counted in the ambience buffer per slot
+- **Configuration**: Specified in `stim-buff-api-params` when using `pcloudAmbience` quale interface
+- **Example**: `high-val=120` or `high-value=120`
+
+### 3. Point Cloud Coordinate Data Device (Extrospector)
+
+**Purpose**: Provides spatial coordinate data from the LiDAR point cloud.
+
+**Syntax**:
+```
+edev | avia0 | pcloud(format=xyz) | livoxGen1-pcloud(data-rate-hz=10) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+```
+
+**Example with n-dgrams-per-frame parameter**:
+```
+edev | avia0 | pcloud(format=xyz) | livoxGen1-pcloud(data-rate-hz=10,n-dgrams-per-frame=84) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+```
+
+**Alternative Format Examples**:
+```
+edev | avia0 | pcloud(format=spherical) | livoxGen1-pcloud(data-rate-hz=10) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+edev | avia0 | pcloud(format=spherical-cartesian) | livoxGen1-pcloud(data-rate-hz=10) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+edev | avia0 | pcloud(format=dual-cartesian) | livoxGen1-pcloud(data-rate-hz=10) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+edev | avia0 | pcloud(format=dual-spherical) | livoxGen1-pcloud(data-rate-hz=10) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+```
+
+**Stim-Buff-API**: `livoxGen1-pcloud`
+**Quale-Iface-API**: `pcloud` - Processes spatial coordinate data from point clouds
+
+**Format Parameter Values** (for pcloud quale-iface-api):
+- `xyz`: Standard Cartesian coordinates (X, Y, Z)
+- `spherical`: Raw spherical coordinates
+- `spherical-cartesian`: Spherical coordinates converted to Cartesian
+- `dual-cartesian`: Dual Cartesian coordinate system
+- `dual-spherical`: Dual spherical coordinate system
+
+**Alternative Format Parameter Names** (synonymous):
+- `format` or `fmt`
+
+### 4. IMU Gyroscope Data Device (Interoceptor)
+
+**Purpose**: Provides gyroscope data from the LiDAR's internal IMU.
+
+**Syntax**:
+```
+idev | avia0 | gyro | livoxGen1-gyro(data-rate-hz=100) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+```
+
+**Stim-Buff-API**: `livoxGen1-gyro`
+**Quale-Iface-API**: `gyro` - Processes gyroscope angular velocity data
+
+### 5. IMU Accelerometer Data Device (Interoceptor)
+
+**Purpose**: Provides accelerometer data from the LiDAR's internal IMU.
+
+**Syntax**:
+```
+idev | avia0 | accel | livoxGen1-accel(data-rate-hz=100) | livoxProto1(command-timeout-ms=1000,retry-delay-ms=3000,smo-ip=192.168.1.50,smo-subnet-nbits=24) | 3JEDK380010Z39
+```
+
+**Stim-Buff-API**: `livoxGen1-accel`
+**Quale-Iface-API**: `accel` - Processes accelerometer linear acceleration data
+
+## Provider Parameters
+
+### livoxProto1 Provider
+
+The `livoxProto1` provider accepts the following parameters:
+
+**command-timeout-ms** / **cmd-timeout-ms** (optional, synonyms):
+- Specifies the timeout for command operations when communicating with devices
+- Value: Integer number of milliseconds
+- Example: `command-timeout-ms=1000` or `cmd-timeout-ms=1000` (1 second timeout)
+- Default: 1000ms if not specified
+
+**retry-delay-ms** (optional):
+- Specifies how long to wait for broadcast messages to arrive after attempting an initial direct connection
+- Value: Integer number of milliseconds
+- Example: `retry-delay-ms=3000` (wait 3 seconds)
+- Default: 3000ms if not specified
+
+**subnet** (optional):
+- Specifies the IP subnet for device IP address calculation
+- Value: IP address in the form X.X.0.0 where non-subnet bits must be 0
+- Example: `subnet=10.42.0.0` (use 10.42.x.x subnet)
+- Default: 0.0.0.0 (use default 192.168.1.x subnet)
+
+**data-port** (optional):
+- Specifies the UDP port for receiving point cloud data from the device
+- Value: Integer port number
+- Example: `data-port=56000`
+- Default: 56000 if not specified
+
+**cmd-port** (optional):
+- Specifies the UDP port for receiving command responses from the device
+- Value: Integer port number
+- Example: `cmd-port=56001`
+- Default: 56001 if not specified
+
+**imu-port** (optional):
+- Specifies the UDP port for receiving IMU data from the device
+- Value: Integer port number
+- Example: `imu-port=56002`
+- Default: 56002 if not specified
+
+## Parameter Summary
+
+### Stim-Buff-API Names
+
+| Stim Feature | Stim-Buff-API | Quale-Iface-API | Description |
+|--------------|---------------|----------------|-------------|
+| Point Cloud Intensity | `livoxGen1-pcloudIntensity` | `pcloudIntensity` | Light intensity/reflectivity data |
+| Point Cloud Ambience | `livoxGen1-pcloud` | `pcloudAmbience` | High-intensity point count per slot |
+| Point Cloud Coordinates | `livoxGen1-pcloud` | `pcloud` | Spatial coordinate data |
+| Gyroscope | `livoxGen1-gyro` | `gyro` | Angular velocity measurements |
+| Accelerometer | `livoxGen1-accel` | `accel` | Linear acceleration measurements |
+
+### Stim-Buff-API Parameters
+
+Each stim-buff-api accepts device-specific parameters:
+
+| Parameter | Description | Default | Example |
+|-----------|-------------|---------|---------|
+| `data-rate-hz` | Data sampling rate in Hz | - | `data-rate-hz=10` |
+| `n-dgrams-per-frame` / `num-dgrams-per-frame` | Number of UDP datagrams per staging buffer frame | 84 | `n-dgrams-per-frame=84` or `num-dgrams-per-frame=84` |
+| `high-value` / `high-val` | Threshold for counting high-intensity points in ambience buffer (for `pcloudAmbience` quale-iface-api) | 116 | `high-val=120` or `high-value=120` |
+
+### Quale-Iface-API Parameters
+
+The `pcloud` quale-iface-api accepts format parameters:
+
+| Format | Description |
+|--------|-------------|
+| `xyz` | Standard Cartesian coordinates (X, Y, Z) |
+| `spherical` | Raw spherical coordinates (range, azimuth, elevation) |
+| `spherical-cartesian` | Spherical coordinates converted to Cartesian |
+| `dual-cartesian` | Dual Cartesian coordinate system |
+| `dual-spherical` | Dual spherical coordinate system |
+
+The `pcloudAmbience` quale-iface-api uses the `high-value` / `high-val` parameter (documented in Stim-Buff-API Parameters above) to determine the intensity threshold for counting high-intensity points per slot.
+
+## Device Discovery and Connection
+
+The specification uses a retry-based connection strategy with two different approaches:
+
+### Connection Methods
+
+**1. Broadcast-Based Connection (connectToKnownDeviceReq)**
+- Uses device IP addresses discovered from broadcast advertisements
+- **smo-ip parameter**: Optional - if omitted, driver auto-detects the appropriate interface
+- **smo-subnet-nbits parameter**: Optional - used for validation if smo-ip is provided
+- **When to use**: When devices are actively broadcasting their presence
+
+**2. Heuristic Connection (connectByDeviceIdentifierReq)**
+- Generates device IP addresses from serial numbers using network prefix
+- **smo-ip parameter**: **Required** - needed to determine network prefix for IP generation
+- **smo-subnet-nbits parameter**: **Required** - needed to calculate valid device IP addresses
+- **When to use**: When devices are not broadcasting or for initial setup
+
+### Connection Strategy
+
+1. **Initial Check**: Check if device is already known from broadcasts
+2. **Direct Connect**: Attempt direct connection based on calculated IP address
+3. **Retry Wait**: If direct connect fails, wait for `retry-delay-ms` for broadcast messages
+4. **Final Check**: Check known devices again after retry delay
+5. **Report Result**: Success or failure based on final check
+
+## Data Formats
+
+### Point Cloud Coordinate Formats
+
+1. **XYZ Format**: Standard 3D Cartesian coordinates
+   - X, Y, Z in meters
+   - Standard coordinate system orientation
+
+2. **Spherical Format**: Raw spherical coordinates
+   - Range (distance) in meters
+   - Azimuth angle in degrees/radians
+   - Elevation angle in degrees/radians
+
+3. **Spherical-Cartesian Format**: Spherical coordinates converted to Cartesian
+   - Range, azimuth, elevation converted to X, Y, Z
+   - Maintains spherical measurement precision
+
+4. **Dual Formats**: Support for dual-coordinate systems
+   - Useful for devices with multiple measurement modes
+   - Provides redundancy and validation capabilities
+
+### Intensity Data
+
+- Reflectivity values typically in the range 0-255
+- Normalized intensity measurements
+- Calibrated for material reflectivity analysis
+
+### IMU Data
+
+- **Gyroscope**: Angular velocity measurements (rad/s)
+- **Accelerometer**: Linear acceleration measurements (m/s²)
+- Timestamped data synchronized with point cloud measurements
+
+## Error Handling
+
+The specification includes comprehensive error handling for:
+
+- Network connectivity issues
+- Device communication timeouts
+- Invalid coordinate format requests
+- IMU data stream interruptions
+- Device discovery failures
+- Connection retry timeouts
@@ -0,0 +1,16 @@
+This guy talks about getting it to work using a fake transform:
+
+https://stackoverflow.com/questions/52420672/ros-rviz-how-to-visualize-a-point-cloud-that-doesnt-have-a-fixed-frame-transfo
+
+This thread contains some info about what a transform is:
+
+https://answers.ros.org/question/328839/
+
+Somewhat useful troubleshooter:
+
+https://www.youtube.com/watch?v=b9YZITmCWe4
+
+Excellent, full-featured explanation:
+
+https://www.youtube.com/watch?v=QyvHhY4Y_Y8
+
@@ -0,0 +1,146 @@
+# Quale Interface APIs (QualeIface APIs)
+
+## Overview
+
+QualeIface APIs are libraries that connect to particular stim buffers and allow the mind to process the stim features presented in the device's stim buffers. They provide the interface between raw device data and the mind's processing capabilities.
+
+## Universally Understood Parameters
+
+The following parameters are universally understood across all QualeIface API implementations.
+
+### `history-buffer-duration-ms` / `hist-buff-duration-ms` / `histbuff-duration-ms` / `histbuff-ms`
+
+**Synonyms:**
+- `history-buffer-duration-ms`
+- `hist-buff-duration-ms`
+- `histbuff-duration-ms`
+- `histbuff-ms`
+
+**Description:**
+This parameter determines how long the history of the particular StimBuff being attached to the DAP spec's device role will be. The value is specified in milliseconds and determines the duration of historical data that will be maintained in the stimulus buffer.
+
+**Specification:**
+- The parameter is specified as part of the `quale-iface-api-params` in the DAP specification
+- The value is an integer representing milliseconds
+- If multiple synonyms are specified, the lattermost (last encountered) synonym takes precedence
+- If not specified, a default value of 30000ms (30 seconds) is used
+
+**Example:**
+```
+edev|my-device|visual-qualeiface(histbuff-ms=60000)|v4l()|linux()|/dev/video0
+```
+
+This example sets the history buffer duration to 60000ms (60 seconds).
+
+**Note:**
+This parameter is specific to each stimbuff/deviceRole combination. Different device roles can have different history buffer durations based on their requirements.
+
+## Overview
+
+QualeIface APIs are interface libraries that connect to particular stim buffers and allow the mind to process the stim features presented in the device's stim buffers. They provide the interface between raw device data and the mind's processing capabilities.
+
+## Universally Understood QualeIface API Parameters
+
+This document describes quale-iface-api-params that are universally understood across all QualeIface implementations.
+
+### history-buffer-duration-ms / hist-buff-duration-ms / histbuff-duration-ms / histbuff-ms
+
+**Purpose:** Determines how long the history of the particular StimBuff being attached to the DAP spec's device role will be.
+
+**Synonyms:**
+- `history-buffer-duration-ms` (full canonical name)
+- `hist-buff-duration-ms` (abbreviated)
+- `histbuff-duration-ms` (shortened)
+- `histbuff-ms` (shortest)
+
+**Type:** Integer (milliseconds)
+
+**Scope:** Specific to each stimbuff/deviceRole. Each device attachment can specify its own history buffer duration independently.
+
+**Default:** If not specified, implementations typically use a default value (commonly 30000ms = 30 seconds).
+
+**Usage:** The value specifies the duration in milliseconds for which stimulus frames will be retained in the history buffer. This affects how many slots are allocated in the ring buffer: `nSlots = histbuffMs / CONFIG_STIMBUFF_FRAME_PERIOD_MS`.
+
+**Example:**
+```
+edev|my-camera|visual-qualeiface(histbuff-ms=60000)|v4l()|linux()|/dev/video0
+```
+
+This example sets a 60-second history buffer duration for the visual qualeiface processing the camera's point cloud data.
+
+**Notes:**
+- If multiple synonyms are specified in the same parameter list, the lattermost one takes precedence
+- The parameter is parsed from the `quale-iface-api-params` section of the DAP specification
+- This parameter is specific to each device attachment, allowing different devices to have different history durations
+
+
+This document describes universally understood quale-iface-api-params that can be used across different QualeIface implementations.
+
+## history-buffer-duration-ms
+
+### Synonyms
+The `history-buffer-duration-ms` parameter can be specified using any of the following names:
+- `history-buffer-duration-ms` (full form)
+- `hist-buff-duration-ms` (abbreviated)
+- `histbuff-duration-ms` (abbreviated, no dashes)
+- `histbuff-ms` (shortest form)
+
+### Description
+The `history-buffer-duration-ms` parameter determines how long the history of the particular StimBuff being attached to the DAP spec's device role will be. This value specifies the duration in milliseconds for which stimulus frame history will be maintained in the buffer.
+
+### Usage
+This parameter is **specific to each stimbuff/deviceRole**. Each device attachment can have its own history buffer duration, allowing fine-grained control over memory usage and history retention for different sensor types.
+
+### Example
+```
+edev|my-camera|visual-qualeiface(histbuff-ms=30000)|v4l()|linux()|/dev/video0
+```
+
+This example sets a 30-second history buffer for the camera device's stimulus buffer.
+
+### Default Value
+If not specified, the default value is **30000 milliseconds (30 seconds)**.
+
+### Notes
+- The parameter value should be specified as an integer representing milliseconds
+- Later synonyms in the parameter list will override earlier ones if multiple are specified
+- The actual number of buffer slots allocated will be calculated based on this duration divided by the frame period (CONFIG_STIMBUFF_FRAME_PERIOD_MS)
+
+
+This document describes universally understood parameters that can be used in quale-iface-api-params for device attachment specifications.
+
+## history-buffer-duration-ms
+
+**Synonyms:**
+- `history-buffer-duration-ms`
+- `hist-buff-duration-ms`
+- `histbuff-duration-ms`
+- `histbuff-ms`
+
+**Description:**
+
+The `history-buffer-duration-ms` parameter determines how long the history of the particular StimBuff being attached to the DAP spec's device role will be. This parameter is specific to each stimbuff/deviceRole combination.
+
+**Usage:**
+
+This parameter specifies the duration in milliseconds for which historical stimulus frame data will be retained in the buffer. The value determines the number of frames that can be stored, based on the frame period configured for the stimulus buffer.
+
+**Example:**
+
+```
+edev|avia0|structural-qualeiface(histbuff-ms=60000)|livoxGen1()|livoxProto1()|3JEDK380010Z39
+```
+
+This example sets the history buffer duration to 60000ms (60 seconds) for the avia0 device.
+
+**Default Value:**
+
+If not specified, the default value is 30000ms (30 seconds).
+
+**Notes:**
+
+- The parameter value should be specified in milliseconds
+- Multiple synonyms can be used, with later synonyms in the parameter list taking precedence
+- This parameter is parsed from the quale-iface-api-params, not from stim-buff-api-params or provider-params
+- The actual number of buffer slots is calculated as: `histbuffMs / CONFIG_STIMBUFF_FRAME_PERIOD_MS`
+
@@ -0,0 +1,122 @@
+# The reason why Rusticl behaves so weirdly with USE_HOST_PTR
+
+```
+[18:21] == rusticluser [~oftc-webi@2803:1500:c00:eb3:c450:9864:8f21:f2fb] has joined #rusticl
+[18:22] <rusticluser> Hey guys, I have questions about the implementation of clEnqueueMapBuffer/clEnqueueUnmapMemObject in Rusticl.
+[18:22] <rusticluser> This webpage says I should ping karolherbst
+[18:22] <rusticluser> https://docs.mesa3d.org/rusticl.html
+[18:23] <rusticluser> I am finding some very odd behaviour on the Raspberry Pi 5, when using the v3d GPU via Rusticl
+[18:24] <rusticluser> (Gimme a bit to write up my questions)
+[18:25] == pbrobinson [~pbrobinso@2001:8b0:fb11:2681:e9:f8b:31b:f797] has joined #rusticl
+[18:29] <rusticluser> Here's a dump of the output from running `RUSTICL_ENABLE=v3d clinfo` on my Raspberry Pi 5: https://gist.github.com/latentPrion/9843ff5b98f21b20b9f6d5bce43006b3
+[18:30] <rusticluser> Of particular note is that it says that the V3D GPU has a unified memory architecture with the main ARM CPU complex:
+[18:30] <rusticluser> >   Unified memory for Host and Device              Yes
+[18:32] <rusticluser> Because all of my target platforms seem to have unified memory with the CL GPUs, I decided that I would aim to optimize my program by using CL_MEM_USE_HOST_PTR, and avoiding using clEnqueueRead/WriteBuffer. I have indeed got it working on both the RPi5 and on my x86 laptop, but some of the things that were required to get it working on the RPi5+Rusticl implementation are a bit odd, and I wanted to confirm whether these behaviours and apparent eccentricities are
+[18:32] <rusticluser> intentional
+[18:34] <rusticluser> Here is my code, for your perusal and reference.
+[18:34] <rusticluser> https://gist.github.com/latentPrion/d9fb3f0604a957d2055786a118072482
+[18:36] <rusticluser> So: the long and short of it is: I have an input buffer (called "assemblyBuffer") that was filled with data by io_uring. I create an openCL buffer for assemblyBuff, using CL_MEM_USE_HOST_PTR. I then want to pass this assemblyBuffer into an OpenCL kernel.
+[18:37] <rusticluser>  The OpenCL kernel doesn't see the data that was written into the buffer unless I use CL_MAP_WRITE_INVALIDATE. I can understand the reasoning behind this, if the reasoning is that the cache invalidation op is performed on the GPU side.
+[18:38] <rusticluser> That makes sense because the GPU's caches may hold stale data that prevent it from seeing the data I put into the HOST_PTR buffer. So the need to invalidate the GPU's caches makes perfect sense and I'm not complaining about this.
+[18:39] <rusticluser> It's the next bit that is a bit confusing to me, and which I suspect is a bug in RustIcl or the MESA driver behind it.
+[18:40] <rusticluser> I have a 2nd buffer, called the "collateBuffer", which is distinct from the "assemblyBuffer". I run a 2nd kernel after the first kernel, which takes the assemblyBuffer as input, and produces its output into the collationBuffer.
+[18:42] <rusticluser> Now, since the 1st kernel wrote its output data into the assemblyBuffer, this should mean that the GPU's caches should be up to date with the data that was just written into the assemblyBuffer by the 1st kernel -- because it was the GPU itself which wrote that data into the assembyBuffer
+[18:43] <rusticluser> Yet, for some reason, I'm still required to remap the assemblyBuffer with CL_MEM_WRITE_INVALIDATE_REGION when I want to run the 2nd kernel.
+[18:43] <rusticluser> 1. I have not modified the assemblyBuffer's data at all on the host CPU. The data in the assemblyBuffer is exactly what was written into it by the 1st kernel when it was running on the GPU.
+[18:44] <rusticluser> 2. The 2nd kernel doesn't write into, or modify the assemblyBuffer at all in any way. The 2nd kernel uses the assemblyBuffer as input *ONLY*.
+[18:44] <rusticluser>  
+[18:46] <rusticluser> I guess my question is: why am I required to first map and unmap the assemblyBuffer as CL_MAP_WRITE_INVALIDATE_REGION before the GPU can see the contents of the assemblyBuffer, even though the GPU itself just wrote that data into it, and the GPU's caches should be in sync with it?
+[18:47] <rusticluser> (You can see the remapping with CL_MAP_WRITE_INVALIDATE_REGION for the 2nd kernel's execution here: https://gist.github.com/latentPrion/d9fb3f0604a957d2055786a118072482#file-openclcollatingandmeshingengine-cpp-L343)
+[18:48] <rusticluser> Technically, I should be able to just map it as CL_MAP_WRITE without needing to specify INVALIDATE_REGION -- am I incorrect?
+[18:49] <rusticluser> Basically what you see in that pasted gist is what is required to get this to work on the RPi5, so any decisions you see in the code are constrained by either (1) Rusticl, (2) MESA drivers, (3) the RPi5 hardware
+[18:51] <rusticluser> I downloaded the Mesa source code and asked Cursor to scan it and find out what's going on (I don't know Rust, so I can't read the code myself very well) and Cursor says that there's an interediate layer of "shadow buffering" implemented by Rusticl between the host and GPU
+[18:52] <rusticluser> And that this intermediate shadow buffering layer is the source of the unexpected behaviours
+[19:11] <karolherbst> rusticluser: launching kernels on mapped buffers is undefined behavior
+[19:14] <karolherbst> though not sure if that's what you run into, just sounded like it
+[19:17] <rusticluser> karolherbst: Yea, but I don't keep them mapped -- notice that I map and then immediately unmap
+[19:18] <rusticluser> Literally: mapBuffer(); unmapBuffer() back to back lol -- good pointer though
+[19:18] <karolherbst> I'm a bit confused by the code, how do you verify that the GPU is or isn't reading the correct data?
+[19:18] <karolherbst> or do you access it through the host pointer directly?
+[19:19] <rusticluser> karolherbst: I check using printf() (OpenCL 1.2 extension) inside of the running kernel, and also I check the resulting output after the kernel has been executed
+[19:19] <karolherbst> ahh
+[19:19] <rusticluser> Would you like to see the kernels? They're just clutter for your headspace, but maybe they might give you some kind of information I don't know about
+[19:19] <karolherbst> USE_HOST_PTR is a bit weird, because it doesn't guarnatee coherency
+[19:20] <rusticluser> Yea -- I can understand that: the real thing that a developer who's using USE_HOST_PTR wants from the underlying implementation is something like this workflow:
+[19:22] <rusticluser> (1) clEnqueueMapBuffer(CL_MAP_WRITE) => /* (2) I write stuff into the buffer */ => (3) clEnqueueUnmapMemObject() /* At this point, during the unmap operation, the CL implementation is expected to write-back the host CPU's caches to main memory, and then invalidate the GPU's caches so that the GPU can see the writes that were stored to main memory
+[19:23] <rusticluser> And for the read-side, the workflow that the developer intuitively expects is:
+[19:25] <rusticluser> (1) clEnqueueMapBuffer(CL_MAP_READ) /* This mapping call should cause the GPU to write-back to main memory, and should cause the host CPU to invalidate its caches so it can see what was written by the GPU */ => (2) /* I read the stuff from the buffer */ => (3) clEnqueueUnmapMemObject() /* No special maintenance required here */
+[19:26] <karolherbst> right.. I think it's potentially also an issue with the rpi driver. It's not really well tested, so random bugs could always exist there. Might want to verify that your application behaves correctly on other GPUs
+[19:27] <rusticluser> Yea -- I only have this RPi5 as an ARM testbed, sadly. The other test machine I have is this shitty Intel Core I5 laptop with an Intel HD GPU. The Intel HD GPU doesn't require any mapping/unmapping of any kind -- the cache coherency domain seems to fully cover the GPU on the Intel laptop
+[19:28] <rusticluser> Idk, maybe it's a bug, maybe it's not -- I guess I was checking to see if the behaviour I was seeing was intentional and I just didn't properly understand the memory/execution model of OpenCL; or whether it's actually a bug somewhere in the underlying implementation's stack
+[19:28] <karolherbst> the intel is the only driver that ever added support for actually mapping host memory into the GPU when it's not page aligned
+[19:29] <rusticluser> Ah -- my HOST_PTRs are aligned to _SC_PAGE_SIZE
+[19:29] <karolherbst> I don't think the rpi driver supports mapping host memory at all
+[19:29] <rusticluser> :(
+[19:29] <karolherbst> yeah...
+[19:30] <karolherbst> not sure if it's because of missing kernel interfaces or what's the reason there
+[19:30] <rusticluser> How can I check and see? I have no understanding of GFX drivers and I hear they're a real domain-specific kind of mess to read;
+[19:30] <rusticluser> At minimum, which "module" in the mesa code provides the RPi5 opencl driver/support?
+[19:31] <karolherbst> well I know that it doesn't support it on the mesa side, but I haven't checked if there is in theory a kernel interface for it or not
+[19:31] <karolherbst> `src/gallium/drivers/v3d/` is the drive inside mesa
+[19:31] <rusticluser> *nod*, thanks
+[19:31] <rusticluser> Is this worth filing a ticket/issue for?
+[19:32] <karolherbst> _not_ sure. Maybe if there is a strong interest to also implement the GL/vulkan features allowing for mapping host memory
+[19:33] <rusticluser> Alright -- I'll just keep my eye on it and if it becomes an unmanageable problem, I'll file a ticket and probably also try to add the support myself
+[19:34] <rusticluser> These new LLMs really enable you to extend yourself into new domains and contribute to stuff you otherwise wouldn't have the time/insight to be able to, so if it really becomes unmanageable, I'll probably be able to just fix it and submit a patch
+[19:34] <karolherbst> though it should still work in theory, so not really sure what's going wrong there
+[19:34] <rusticluser> It should lol -- the purpose of the clEnqueueMap/Unmap calls isn't to actually "map" anything -- it's purely to manage the cache synchronization between the host CPU complex and the GPU
+[19:35] <karolherbst> but I'd verify if your application behaves as expected on other hardware/drivers as well, maybe even on discrete GPUs
+[19:35] <rusticluser> AFAICT, it's probably just a bug in the cache management
+[19:35] <rusticluser> It definitely won't work on a GPU that doesn't have shared memory because the design is explicitly for USE_HOST_PTR
+[19:36] <karolherbst> then it's broken also for shared memory systems
+[19:36] <rusticluser> Hmmm -- could you elaborate on that?
+[19:37] <karolherbst> USE_HOST_PTR doesn't really allow for different use csaes as it doesn't really gurantee anything except that the pointer returned by mapBuffer matches the host pointer
+[19:37] <karolherbst> aand that's all the additional guarantee it gives you
+[19:38] <karolherbst> you still have to use it as if it wouldn't be a host ptr allocation, because synchronization points are the same as with non host ptr allocations
+[19:38] <rusticluser> Yes, indeed: but it's also explicitly different from CL_MEM_ALLOC_HOST_PTR, I think? The difference is that CL_MEM_ALLOC_HOST_PTR is likely to be mapping in device MMIO registers
+[19:38] <karolherbst> alloc host ptr just means that the allocation is done in host memory instead of VRAM
+[19:38] <karolherbst> maybe
+[19:38] <karolherbst> it's just a hint
+[19:39] <karolherbst> like it uses GART infrastructure and the GPU just accesses memory over PCIe (if a discrete GPU)
+[19:39] <karolherbst> for unified memory GPU it shouldn't make any difference
+[19:39] <rusticluser> I'm sorry -- am I wrong? CL_MEM_ALLOC_HOST_PTR means only that the buffer returned will be *ACCESSIBLE* by the host. This means that the buffer could be MMIO mapped registers, or some other such memory range
+[19:40] <rusticluser> It doesn't actually mean that the buffer is allocated from host mem
+[19:40] <rusticluser> It just means that the buffer will be *ACCESSIBLE* from host mem, __POTENTIALLY__ without a copy
+[19:40] <karolherbst> it has nothing to do with access
+[19:41] <rusticluser> https://registry.khronos.org/OpenCL/sdk/3.0/docs/man/html/clCreateBuffer.html:
+[19:41] <karolherbst> sure, but it means something else
+[19:41] <rusticluser> > This flag specifies that the application wants the OpenCL implementation to allocate memory from host accessible memory.  CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR are mutually exclusive.
+[19:41] <rusticluser> Ah ok lol
+[19:41] <karolherbst> like you can't access the memory allocation either way directly, because you have to map
+[19:42] <karolherbst> though CL_MEM_ALLOC_HOST_PTR is more of a "please don't use VRAM, so that reading out the memory on the host is quick"
+[19:42] <rusticluser> It seems like the reason why they say that ALLOC_HOST_PTR and USE_HOST_PTR are mutually exclusive is *precisely because* ALLOC_HOST_PTR is not guaranteed to be allocated within host memory lol
+[19:42] <karolherbst> well.. you have no control over what address the mapping will have
+[19:43] <karolherbst> USE_HOST_PTR already uses host memory, so alloc_host_ptr is meaningless
+[19:43] <rusticluser> I am fairly certain that MEM_ALLOC_HOST_PTR means, "You may use VRAM if you wish, but ensure that it's a portion of your internal VRAM that can be exposed and mapped as MMIO. You may also use host RAM if you wish -- both are fine"
+[19:43] <rusticluser> [19:43] <karolherbst> USE_HOST_PTR already uses host memory, so alloc_host_ptr is meaningless
+[19:43] <rusticluser> ^ Absolutely correct
+[19:43] <rusticluser> Wait whoa no
+[19:44] <karolherbst> VRAM can always be mapped into host memory, it's just slow
+[19:44] <karolherbst> and you have to fight with PCI bar sizes
+[19:44] <karolherbst> though you can also set different caching hints etc..
+[19:45] <rusticluser> When I say "VRAM" here, I was mimicking your language, but a more accurate term would be "device memory" because there's no guarantee that the OpenCL device is indeed a GPU, or that it exposes all of its global, local or private memory in an MMIO or host-accessible fashion lol
+[19:45] <rusticluser> Ok errm, I don't think arguing over this will go very far lol
+[19:46] <rusticluser> But I really appreciate your pointers -- I'll look for another test board
+[19:46] <rusticluser> Really appreciate your time -- I know this is a volunteer effort on your part
+[19:47] <fdobridge_> <leftmostcat> Heheh. Pointers.
+[19:47] == rusticluser [~oftc-webi@2803:1500:c00:eb3:c450:9864:8f21:f2fb]
+[19:47] ==  realname : OFTC WebIRC Client
+[19:47] ==  channels : #rusticl
+[19:47] ==  server   : weber.oftc.net [Newark, NJ, USA]
+[19:47] ==  realhost :  [ip: actually using host]
+[19:47] ==  idle     : 0 days 0 hours 1 minutes 20 seconds [connected: Wed Nov 12 18:21:37 2025]
+[19:47] == End of WHOIS
+[19:49] <karolherbst> yeah anyway.. on the rpi5 driver might as well not use use_host_ptr because rusticl will have to copy things around to fake host_ptr support anyway. So might as well then not use it. But I also wanted to implement more optimized map/unmap paths for single device context with unified memory, because atm it's asuming worst case and isn't really
+[19:49] <karolherbst> optimized very well anyway
+[19:49] <karolherbst> but those optimizations will also paper over correctness issues
+[19:51] <karolherbst> though I'm also not convinced that the emulation code is 100% correct...
+[19:52] <karolherbst> there _might_ be a bug if the mapping has different accesses, but I never found anything that ran into issues here
+[19:54] <karolherbst> you could run with `RUSTICL_DEBUG=memory` and see if the prints make any sense. It should tell when the memory content is migrated and moved around
+[20:00] <rusticluser> karolherbst: Ah that's awesome info, thanks
+[20:01] <rusticluser> It would be really useful to have an explicit confirmation of whether I'm actually getting zero-copy
+```
@@ -0,0 +1,58 @@
+#ifndef ASYNCHRONOUS_BRIDGE_H
+#define ASYNCHRONOUS_BRIDGE_H
+
+#include <boostAsioLinkageFix.h>
+#include <atomic>
+#include <boost/asio/io_service.hpp>
+
+namespace smo {
+
+class AsynchronousBridge
+{
+public:
+	AsynchronousBridge(boost::asio::io_service &io_service)
+	: isAsyncOperationComplete(false), io_service(io_service)
+	{}
+
+	void setAsyncOperationComplete(void)
+	{
+		/**		EXPLANATION:
+		 * This empty post()ed message is necessary to ensure that the thread
+		 * that's waiting on the io_service is signaled to wake up and check
+		 * the io_service's queue.
+		 */
+		isAsyncOperationComplete.store(true);
+		io_service.post([]{});
+	}
+
+	void waitForAsyncOperationCompleteOrIoServiceStopped(void)
+	{
+		for (;;)
+		{
+			io_service.run_one();
+			if (isAsyncOperationComplete.load() || io_service.stopped())
+				{ break; }
+
+			/**	EXPLANATION:
+			 * In the mrntt and mind thread loops we call checkException() after
+			 * run() returns, but we don't have to do that here because
+			 * setException() calls stop.
+			 *
+			 * So if an exception is set on our thread, we'll break out of this
+			 * loop due to the check for stopped() above, and that'll take us
+			 * back out to the main loop, where we'll catch the exception.
+			 */
+		}
+	}
+
+	bool exitedBecauseIoServiceStopped(void) const
+		{ return io_service.stopped(); }
+
+private:
+	std::atomic<bool> isAsyncOperationComplete;
+	boost::asio::io_service &io_service;
+};
+
+} // namespace smo
+
+#endif // ASYNCHRONOUS_BRIDGE_H
@@ -0,0 +1,158 @@
+#ifndef ASYNCHRONOUS_CONTINUATION_H
+#define ASYNCHRONOUS_CONTINUATION_H
+
+#include <functional>
+#include <memory>
+#include <exception>
+#include <componentThread.h>
+#include <callback.h>
+#include <callableTracer.h>
+#include <asynchronousContinuationChainLink.h>
+
+
+namespace smo {
+
+/**
+ * AsynchronousContinuation - Template base class for async sequence management
+ *
+ * This template provides a common pattern for managing asynchronous operations
+ * that need to maintain object lifetime through a sequence of callbacks.
+ *
+ * The template parameter OriginalCbFnT represents the signature of the original
+ * callback that will be invoked when the async sequence completes.
+ */
+template <class OriginalCbFnT>
+class AsynchronousContinuation
+:	public AsynchronousContinuationChainLink
+{
+public:
+	explicit AsynchronousContinuation(Callback<OriginalCbFnT> originalCb)
+	: originalCallback(std::move(originalCb))
+	{}
+
+	/**		EXPLANATION:
+	 * Each numbered segmented sequence persists the lifetime of the
+	 * continuation object by taking a copy of its shared_ptr.
+	 */
+	typedef void (SegmentFn)(
+		std::shared_ptr<AsynchronousContinuation<OriginalCbFnT>>
+			lifetimePreservingConveyance);
+
+	/**	EXPLANATION:
+	 * When an exception is thrown in a an async callee, which pertains to an
+	 * error in the data given by the caller, we ought not to throw the
+	 * exception within the callee. Instead, we should store the exception
+	 * in the continuation object and return it to the caller.
+	 *
+	 * The caller should then call checkException() to rethrow it on its
+	 * own stack.
+	 *
+	 * This macro should be used by the caller to bubble the exception to the
+	 * caller.
+	 */
+	#define CALLEE_SETEXC(continuation, type, exc_obj) \
+		(continuation)->exception = std::make_exception_ptr<type>(exc_obj)
+
+	#define CALLEE_SETEXC_CALLCB(continuation, type, exc_obj) \
+		do { \
+			CALLEE_SETEXC(continuation, type, exc_obj); \
+			(continuation)->callOriginalCb(); \
+		} while(0)
+
+	#define CALLEE_SETEXC_CALLCB_RET(continuation, type, exc_obj) \
+		do { \
+			CALLEE_SETEXC_CALLCB(continuation, type, exc_obj); \
+			return; \
+		} while(0)
+
+	// Call this in the caller to rethrow the exception.
+	void checkException()
+	{
+		if (exception)
+			{ std::rethrow_exception(exception); }
+	}
+
+	// Implement the virtual method from AsynchronousContinuationChainLink
+	virtual std::shared_ptr<AsynchronousContinuationChainLink>
+	getCallersContinuationShPtr() const override
+		{ return originalCallback.callerContinuation; }
+
+public:
+	Callback<OriginalCbFnT> originalCallback;
+	std::exception_ptr exception;
+};
+
+/**
+ * NonPostedAsynchronousContinuation - For continuations that don't post
+ * callbacks
+ *
+ * Note: We intentionally do not create a
+ * LockedNonPostedAsynchronousContinuation because the only way to implement
+ * non-posted locking would be via busy-spinning or sleeplocks. This would
+ * eliminate the throughput advantage from our Qspinning mechanism, which
+ * relies on re-posting to the io_service queue when locks are unavailable.
+ */
+template <class OriginalCbFnT>
+class NonPostedAsynchronousContinuation
+:	public AsynchronousContinuation<OriginalCbFnT>
+{
+public:
+	explicit NonPostedAsynchronousContinuation(
+		Callback<OriginalCbFnT> originalCb)
+	:	AsynchronousContinuation<OriginalCbFnT>(originalCb)
+	{}
+
+	/**
+	 * @brief Call the original callback with perfect forwarding
+	 * (immediate execution)
+	 *
+	 * This implementation calls the original callback immediately without
+	 * posting to any thread or queue. Used for non-posted continuations.
+	 *
+	 * @param args Arguments to forward to the original callback
+	 */
+	template<typename... Args>
+	void callOriginalCb(Args&&... args)
+	{
+		if (AsynchronousContinuation<OriginalCbFnT>::originalCallback
+			.callbackFn)
+		{
+			AsynchronousContinuation<OriginalCbFnT>::originalCallback
+				.callbackFn(std::forward<Args>(args)...);
+		}
+	}
+};
+
+template <class OriginalCbFnT>
+class PostedAsynchronousContinuation
+:	public AsynchronousContinuation<OriginalCbFnT>
+{
+public:
+	PostedAsynchronousContinuation(
+		const std::shared_ptr<ComponentThread> &caller,
+		Callback<OriginalCbFnT> originalCbFn)
+	:	AsynchronousContinuation<OriginalCbFnT>(originalCbFn),
+	caller(caller)
+	{}
+
+	template<typename... Args>
+	void callOriginalCb(Args&&... args)
+	{
+		if (AsynchronousContinuation<OriginalCbFnT>::originalCallback
+			.callbackFn)
+		{
+			caller->getIoService().post(
+				STC(std::bind(
+					AsynchronousContinuation<OriginalCbFnT>::originalCallback
+						.callbackFn,
+					std::forward<Args>(args)...)));
+		}
+	}
+
+public:
+	std::shared_ptr<ComponentThread> caller;
+};
+
+} // namespace smo
+
+#endif // ASYNCHRONOUS_CONTINUATION_H
@@ -0,0 +1,32 @@
+#ifndef ASYNCHRONOUS_CONTINUATION_CHAIN_LINK_H
+#define ASYNCHRONOUS_CONTINUATION_CHAIN_LINK_H
+
+#include <memory>
+
+namespace smo {
+
+/**
+ * @brief Base class for all asynchronous continuation chain links
+ *
+ * This non-template base class provides type erasure for the continuation
+ * chain, allowing RTTI and dynamic casting when walking the chain.
+ *
+ * The chain walking logic can use dynamic_cast to determine the most
+ * derived type and perform appropriate operations.
+ *
+ * Inherits from enable_shared_from_this to allow objects to obtain a
+ * shared_ptr to themselves, which is useful for gridlock detection tracking.
+ */
+class AsynchronousContinuationChainLink
+:	public std::enable_shared_from_this<AsynchronousContinuationChainLink>
+{
+public:
+    virtual ~AsynchronousContinuationChainLink() = default;
+
+	virtual std::shared_ptr<AsynchronousContinuationChainLink>
+	getCallersContinuationShPtr() const = 0;
+};
+
+} // namespace smo
+
+#endif // ASYNCHRONOUS_CONTINUATION_CHAIN_LINK_H
@@ -0,0 +1,69 @@
+#ifndef ASYNCHRONOUS_LOOP_H
+#define ASYNCHRONOUS_LOOP_H
+
+#include <atomic>
+
+namespace smo {
+
+class AsynchronousLoop
+{
+public:
+	AsynchronousLoop(
+		const unsigned int nTotal,
+		unsigned int nSucceeded=0, unsigned int nFailed=0)
+	: nTotal(nTotal), nSucceeded(nSucceeded), nFailed(nFailed)
+	{}
+
+	AsynchronousLoop(const AsynchronousLoop& other)
+	: nTotal(other.nTotal),
+	nSucceeded(other.nSucceeded.load()), nFailed(other.nFailed.load())
+	{}
+
+	AsynchronousLoop& operator=(const AsynchronousLoop& other)
+	{
+		if (this != &other)
+		{
+			nTotal = other.nTotal;
+			nSucceeded.store(other.nSucceeded.load());
+			nFailed.store(other.nFailed.load());
+		}
+		return *this;
+	}
+
+	bool isComplete(void) const
+	{
+		return nSucceeded + nFailed == nTotal;
+	}
+
+	void incrementSuccessOrFailureDueTo(bool success)
+	{
+		if (success)
+			{ ++nSucceeded; }
+		else
+			{ ++nFailed; }
+	}
+
+	bool incrementSuccessOrFailureAndTestForCompletionDueTo(bool success)
+	{
+		incrementSuccessOrFailureDueTo(success);
+		return isComplete();
+	}
+
+	bool nTotalIsZero(void) const
+	{
+		return nTotal == 0;
+	}
+
+	void setRemainingIterationsToFailure()
+	{
+		nFailed.store(nTotal - nSucceeded.load());
+	}
+
+public:
+	unsigned int nTotal;
+	std::atomic<unsigned int> nSucceeded, nFailed;
+};
+
+} // namespace smo
+
+#endif // ASYNCHRONOUS_LOOP_H
@@ -0,0 +1,138 @@
+#ifndef CALLABLE_TRACER_H
+#define CALLABLE_TRACER_H
+
+#include <config.h>
+#include <string>
+#include <functional>
+#include <iostream>
+#include <cstdint>
+#include <opts.h>
+
+namespace smo {
+
+/**
+ * @brief CallableTracer - Wraps callables with metadata for debugging
+ *
+ * This class wraps any callable object with metadata (caller function name,
+ * line number, and return addresses) to help debug cases where callables
+ * posted to boost::asio::io_service have gone out of scope. The metadata
+ * can be accessed from the callable's address when debugging.
+ */
+class CallableTracer
+{
+public:
+	/**
+	 * @brief Constructor that wraps a callable with metadata
+	 * @param callerFuncName The name of the function that created this callable
+	 * @param callerLine The line number where this callable was created
+	 * @param returnAddr0 The return address of the direct caller
+	 * @param returnAddr1 The return address of the caller before that
+	 * @param callable The callable object to wrap
+	 */
+	template<typename CallableT>
+	explicit CallableTracer(
+		const char* callerFuncName,
+		int callerLine,
+		void* returnAddr0,
+		void* returnAddr1,
+		CallableT&& callable)
+	: callerFuncName(callerFuncName),
+	  callerLine(callerLine),
+	  returnAddr0(returnAddr0),
+	  returnAddr1(returnAddr1),
+	  callable(std::forward<CallableT>(callable))
+	{}
+
+	void operator()()
+	{
+		if (OptionParser::getOptions().traceCallables)
+		{
+			std::cout << "" << __func__ << ": On thread "
+				<< (ComponentThread::tlsInitialized()
+					? ComponentThread::getSelf()->name : "<TLS un-init'ed>")
+					<< ": Calling callable posted by:\n"
+				<< "\t" << callerFuncName << "\n\tat line " << (int)callerLine
+				<< " return addr 0: " << returnAddr0
+				<< ", return addr 1: " << returnAddr1
+				<< std::endl;
+		}
+		callable();
+	}
+
+public:
+	/// Name of the function that created this callable
+	std::string callerFuncName;
+	/// Line number where this callable was created
+	int callerLine;
+	/// Return address of the direct caller
+	void* returnAddr0;
+	/// Return address of the caller before that
+	void* returnAddr1;
+
+private:
+	/// The wrapped callable (type-erased using std::function)
+	std::function<void()> callable;
+};
+
+} // namespace smo
+
+/**
+ * @brief STC - SMO Traceable Callable macro
+ *
+ * When CONFIG_DEBUG_TRACE_CALLABLES is defined, wraps the callable with
+ * CallableTracer to store metadata (caller function name, line number,
+ * and return addresses). When not defined, returns the callable directly
+ * with no overhead.
+ *
+ * Uses compiler-specific macros to get fully qualified function names:
+ * - GCC/Clang: __PRETTY_FUNCTION__ (includes full signature with namespace/class)
+ * - MSVC: __FUNCSIG__ (includes full signature)
+ * - Fallback: __func__ (unqualified function name only)
+ *
+ * Uses compiler-specific builtins to get return addresses:
+ * - GCC/Clang: __builtin_return_address(0) and __builtin_return_address(1)
+ * - MSVC: _ReturnAddress() (only one level available)
+ * - Fallback: nullptr for return addresses
+ *
+ * Usage:
+ *   thread->getIoService().post(
+ *       STC(std::bind(&SomeClass::method, this, arg1, arg2)));
+ */
+#ifdef CONFIG_DEBUG_TRACE_CALLABLES
+	#if defined(__GNUC__) || defined(__clang__)
+		// GCC/Clang: __PRETTY_FUNCTION__ gives full signature
+		// e.g., "void smo::SomeClass::method(int, int)"
+		// __builtin_return_address(0) = direct caller
+		// __builtin_return_address(1) = caller before that
+		#define STC(arg) smo::CallableTracer( \
+			__PRETTY_FUNCTION__, \
+			__LINE__, \
+			__builtin_return_address(0), \
+			__builtin_return_address(1), \
+			arg)
+	#elif defined(_MSC_VER)
+		// MSVC: __FUNCSIG__ gives full signature
+		// e.g., "void __cdecl smo::SomeClass::method(int, int)"
+		// _ReturnAddress() = direct caller (only one level available)
+		#include <intrin.h>
+		#define STC(arg) smo::CallableTracer( \
+			__FUNCSIG__, \
+			__LINE__, \
+			_ReturnAddress(), \
+			nullptr, \
+			arg)
+	#else
+		// Fallback to standard __func__ (unqualified name only)
+		// No return address support
+		#define STC(arg) smo::CallableTracer( \
+			__func__, \
+			__LINE__, \
+			nullptr, \
+			nullptr, \
+			arg)
+	#endif
+#else
+#define STC(arg) arg
+#endif
+
+#endif // CALLABLE_TRACER_H
@@ -0,0 +1,31 @@
+#ifndef CALLBACK_H
+#define CALLBACK_H
+
+#include <memory>
+
+namespace smo {
+
+// Forward declaration
+class AsynchronousContinuationChainLink;
+
+/**
+ * @brief Callback class that wraps a function and its caller continuation
+ * 
+ * This class provides a way to pass both a callback function and the
+ * caller's continuation in a single object, enabling deadlock detection
+ * by walking the chain of continuations.
+ *
+ * Usage: Callback<CbFnT>{context, std::bind(...)}
+ */
+template<typename CbFnT>
+class Callback
+{
+public:
+	// Aggregate initialization allows: Callback<CbFnT>{context, std::bind(...)}
+	std::shared_ptr<AsynchronousContinuationChainLink> callerContinuation;
+	CbFnT callbackFn;
+};
+
+} // namespace smo
+
+#endif // CALLBACK_H
@@ -9,6 +9,22 @@
 #define CONFIG_MIND_VOSCILLATOR_PERIOD_MS @MIND_VOSCILLATOR_PERIOD_MS@
 #define CONFIG_MIND_VOSCILLATOR_FREQ_MS @MIND_VOSCILLATOR_FREQ_MS@

+/* Device manager reattacher configuration */
+#define CONFIG_MRNTT_DEVMGR_REATTACHER_PERIOD_MS @MRNTT_DEVMGR_REATTACHER_PERIOD_MS@
+/* Stimulus buffer frame period configuration */
+#define CONFIG_STIMBUFF_FRAME_PERIOD_MS @CONFIG_STIMBUFF_FRAME_PERIOD_MS@
+#define CONFIG_STIMBUFF_FRAME_RETRY_DELAY_MS @CONFIG_STIMBUFF_FRAME_RETRY_DELAY_MS@
+
+/* World thread configuration */
+#cmakedefine CONFIG_WORLD_USE_BODY_THREAD
+
+/* Debug locking configuration */
+#cmakedefine CONFIG_ENABLE_DEBUG_LOCKS
+#cmakedefine CONFIG_DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS @DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS@
+
+/* Debug callable tracing configuration */
+#cmakedefine CONFIG_DEBUG_TRACE_CALLABLES
+
 /* Cross-compilation configuration */
 #cmakedefine CMAKE_CROSSCOMPILING

@@ -16,32 +32,20 @@
 #cmakedefine CONFIG_LIB_XCBXORG_ENABLED
 #cmakedefine CONFIG_LIB_ALSA_ENABLED

-/* Sense APIs */
-#cmakedefine CONFIG_SENSEAPI_XCBWINDOW_ENABLED
-#cmakedefine CONFIG_SENSEAPI_V4L_ENABLED
-#cmakedefine CONFIG_SENSEAPI_ALSAMIC_ENABLED
-#cmakedefine CONFIG_SENSEAPI_LIVOX_ENABLED
-#cmakedefine CONFIG_SENSEAPI_R3LIVE_ENABLED
-#cmakedefine CONFIG_SENSEAPI_FASTLIO2_ENABLED
-#cmakedefine CONFIG_SENSEAPI_ADALIO2_ENABLED
-#cmakedefine CONFIG_SENSEAPI_DEEPLIO2_ENABLED
+/* Stim Buff APIs */
+#cmakedefine CONFIG_STIMBUFFAPI_XCBWINDOW_ENABLED
+#cmakedefine CONFIG_STIMBUFFAPI_LIVOXGEN1_ENABLED
+#cmakedefine CONFIG_STIMBUFFAPI_V4L_ENABLED
+#cmakedefine CONFIG_STIMBUFFAPI_ALSAMIC_ENABLED
+#cmakedefine CONFIG_STIMBUFFAPI_LIVOX_ENABLED
+#cmakedefine CONFIG_STIMBUFFAPI_R3LIVE_ENABLED
+#cmakedefine CONFIG_STIMBUFFAPI_FASTLIO2_ENABLED
+#cmakedefine CONFIG_STIMBUFFAPI_ADALIO2_ENABLED
+#cmakedefine CONFIG_STIMBUFFAPI_DEEPLIO2_ENABLED

 /* Wilzor APIs */
 #cmakedefine CONFIG_WILZORAPI_XCBMOUSE_ENABLED
 #cmakedefine CONFIG_WILZORAPI_XCBKEYBOARD_ENABLED
 #cmakedefine CONFIG_WILZORAPI_ALSAVOICE_ENABLED

-/* Legacy defines for backward compatibility */
-#cmakedefine CONFIG_XCBWINDOW_ENABLED
-#cmakedefine CONFIG_V4L_ENABLED
-#cmakedefine CONFIG_ALSAMIC_ENABLED
-#cmakedefine CONFIG_LIVOX_ENABLED
-#cmakedefine CONFIG_R3LIVE_ENABLED
-#cmakedefine CONFIG_FASTLIO2_ENABLED
-#cmakedefine CONFIG_ADALIO2_ENABLED
-#cmakedefine CONFIG_DEEPLIO2_ENABLED
-#cmakedefine CONFIG_XCBMOUSE_ENABLED
-#cmakedefine CONFIG_XCBKEYBOARD_ENABLED
-#cmakedefine CONFIG_ALSAVOICE_ENABLED
-
 #endif /* _CONFIG_H */
@@ -0,0 +1,85 @@
+#ifndef DEPENDENCY_GRAPH_H
+#define DEPENDENCY_GRAPH_H
+
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <memory>
+
+namespace smo {
+
+// Forward declarations
+class AsynchronousContinuationChainLink;
+
+/**
+ * @brief DependencyGraph - Represents a directed graph for lock dependency analysis
+ *
+ * This graph represents dependencies between continuations (lockvokers) where
+ * an edge from A to B means that continuation A wants a lock that is held by
+ * continuation B. This is used to detect circular dependencies (gridlocks).
+ */
+class DependencyGraph
+{
+public:
+	typedef std::shared_ptr<AsynchronousContinuationChainLink> Node;
+	// Each node maps to a set of nodes it depends on
+	typedef std::unordered_map<Node, std::unordered_set<Node>> AdjacencyList;
+
+public:
+	void addNode(const Node& node);
+
+	/**
+	 * @brief Add a directed edge from source to target
+	 * @param source The continuation that wants a lock
+	 * @param target The continuation that holds the wanted lock
+	 */
+	void addEdge(const Node& source, const Node& target);
+
+	/**
+	 * @brief Find all cycles in the graph using DFS
+	 * @return Vector of cycles, where each cycle is a vector of nodes
+	 */
+	std::vector<std::vector<Node>> findCycles() const;
+
+	/**
+	 * @brief Check if there are any cycles in the graph
+	 * @return true if cycles exist, false otherwise
+	 */
+	bool hasCycles() const;
+
+	/**
+	 * @brief Get the number of nodes in the graph
+	 * @return Number of nodes
+	 */
+	size_t getNodeCount() const;
+
+	/**
+	 * @brief Get the adjacency list for debugging
+	 * @return Reference to the adjacency list
+	 */
+	const AdjacencyList& getAdjacencyList() const { return adjacencyList; }
+
+private:
+	/**
+	 * @brief DFS helper for cycle detection
+	 * @param node Current node being visited
+	 * @param visited Set of nodes that have been fully processed
+	 * @param recursionStack Set of nodes currently in the recursion stack
+	 * @param path Current path being explored
+	 * @param cycles Vector to store found cycles
+	 */
+	void dfsCycleDetection(
+		const Node& node,
+		std::unordered_set<Node>& visited,
+		std::unordered_set<Node>& recursionStack,
+		std::vector<Node>& path,
+		std::vector<std::vector<Node>>& cycles)
+		const;
+
+private:
+	AdjacencyList adjacencyList;
+};
+
+} // namespace smo
+
+#endif // DEPENDENCY_GRAPH_H
@@ -0,0 +1,260 @@
+#ifndef LOCK_SET_H
+#define LOCK_SET_H
+
+#include <vector>
+#include <stdexcept>
+#include <utility>
+#include <memory>
+#include <optional>
+#include <qutex.h>
+#include <lockerAndInvokerBase.h>
+
+namespace smo {
+
+// Forward declarations
+template <class OriginalCbFnT>
+class SerializedAsynchronousContinuation;
+class Qutex;
+
+/**
+ * @brief LockSet - Manages a collection of locks for acquisition/release
+ */
+template <class OriginalCbFnT>
+class LockSet
+{
+public:
+	/**	EXPLANATION:
+	 * Tracks both the Qutex that must be acquired, as well as the parent
+	 * LockerAndInvoker that this LockSet has registered into that Qutex's
+	 * queue.
+	 */
+	struct LockUsageDesc
+	{
+		std::reference_wrapper<Qutex> qutex;
+		typename LockerAndInvokerBase::List::iterator iterator;
+		bool hasBeenReleased = false;
+
+		LockUsageDesc(std::reference_wrapper<Qutex> qutexRef,
+			typename LockerAndInvokerBase::List::iterator iter)
+			: qutex(qutexRef), iterator(iter), hasBeenReleased(false) {}
+	};
+
+	typedef std::vector<std::reference_wrapper<Qutex>> Set;
+
+public:
+	/**
+	 * @brief Constructor
+	 * @param parentContinuation Reference to the parent
+	 * 	SerializedAsynchronousContinuation
+	 * @param qutexes Vector of Qutex references that must be acquired
+	 */
+	LockSet(
+		SerializedAsynchronousContinuation<OriginalCbFnT> &parentContinuation,
+		std::vector<std::reference_wrapper<Qutex>> qutexes = {})
+	: parentContinuation(parentContinuation), allLocksAcquired(false),
+	registeredInQutexQueues(false)
+	{
+		/* Convert Qutex references to LockUsageDesc (iterators will be filled
+		 * in during registration)
+		 */
+		locks.reserve(qutexes.size());
+		for (auto& qutexRef : qutexes)
+		{
+			locks.emplace_back(
+				qutexRef,
+				typename LockerAndInvokerBase::List::iterator{});
+		}
+	}
+
+	/**
+	 * @brief Register the LockSet with all its Qutex locks
+	 * @param lockvoker The LockerAndInvoker to register with each Qutex
+	 *
+	 *	EXPLANATION:
+	 * I'm not sure an unregisterFromQutexQueues() method is needed.
+	 * Why? Because if an async sequence can't acquire all locks, it will
+	 * simply never leave the qutexQ until it eventually does. The only other
+	 * time it will leave the qutexQ is when the program terminates.
+	 *
+	 * I'm not sure we'll actually cancal all in-flight async sequences --
+	 * and especially not all those that aren't even in any io_service queues.
+	 * To whatever extent these objects get cleaned up, they'll probably be
+	 * cleaned up in the qutexQ's std::list destructor -- and that won't
+	 * execute any fancy cleanup logic. It'll just clear() out the list.
+	 */
+	void registerInQutexQueues(
+		const std::shared_ptr<LockerAndInvokerBase> &lockvoker
+		)
+	{
+		/**	EXPLANATION:
+		 * Register the lockvoker with each Qutex and store the returned
+		 * iterator to its place within each Qutex's queue. We store the
+		 * iterator so that we can quickly move the lockvoker around within
+		 * the queue, and eventually, erase() it when we acquire all the
+		 * locks.
+		 */
+		for (auto& lockUsageDesc : locks)
+		{
+			lockUsageDesc.iterator = lockUsageDesc.qutex.get().registerInQueue(
+				lockvoker);
+		}
+
+		registeredInQutexQueues = true;
+	}
+
+	void unregisterFromQutexQueues()
+	{
+		if (!registeredInQutexQueues)
+		{
+			throw std::runtime_error(
+				std::string(__func__) +
+				": LockSet::unregisterFromQutexQueues() called but not "
+				"registered in Qutex queues");
+		}
+
+		// Unregister from all qutex queues
+		for (auto& lockUsageDesc : locks)
+		{
+			auto it = lockUsageDesc.iterator;
+			lockUsageDesc.qutex.get().unregisterFromQueue(it);
+		}
+	}
+
+
+	/**
+	 * @brief Try to acquire all locks in order; back off if acquisition fails
+	 * @param lockvoker The LockerAndInvoker attempting to acquire the locks
+	 * @param firstFailedQutex Output parameter to receive the first Qutex that
+	 * 	failed acquisition (can be nullptr)
+	 * @return true if all locks were acquired, false otherwise
+	 */
+	bool tryAcquireOrBackOff(
+		LockerAndInvokerBase &lockvoker,
+		std::optional<std::reference_wrapper<Qutex>> &firstFailedQutex
+			= std::nullopt
+		)
+	{
+		if (!registeredInQutexQueues)
+		{
+			throw std::runtime_error(
+				std::string(__func__) +
+				": LockSet::tryAcquireOrBackOff() called but not registered in "
+				"Qutex queues");
+		}
+		if (allLocksAcquired)
+		{
+			throw std::runtime_error(
+				std::string(__func__) +
+				": LockSet::tryAcquireOrBackOff() called but allLocksAcquired "
+				"is already true");
+		}
+
+		// Try to acquire all required locks
+		int nAcquired = 0;
+		const int nRequiredLocks = static_cast<int>(locks.size());
+		for (auto& lockUsageDesc : locks)
+		{
+			if (!lockUsageDesc.qutex.get().tryAcquire(
+				lockvoker, nRequiredLocks))
+			{
+				// Set the first failed qutex for debugging
+				firstFailedQutex = std::ref(lockUsageDesc.qutex.get());
+				break;
+			}
+
+			nAcquired++;
+		}
+
+		if (nAcquired < nRequiredLocks)
+		{
+			// Release any locks we managed to acquire
+			for (int i = 0; i < nAcquired; i++) {
+				locks[i].qutex.get().backoff(lockvoker, nRequiredLocks);
+			}
+
+			return false;
+		}
+
+		allLocksAcquired = true;
+		return true;
+	}
+
+	// @brief Release all locks
+	void release()
+	{
+		if (!registeredInQutexQueues)
+		{
+			throw std::runtime_error(
+				std::string(__func__) +
+				": LockSet::release() called but not registered in Qutex "
+				"queues");
+		}
+
+		if (!allLocksAcquired)
+		{
+			throw std::runtime_error(
+				std::string(__func__) +
+				": LockSet::release() called but allLocksAcquired is false");
+		}
+
+		for (auto& lockUsageDesc : locks)
+		{
+			if (lockUsageDesc.hasBeenReleased) { continue; }
+
+			lockUsageDesc.qutex.get().release();
+		}
+
+		allLocksAcquired = false;
+	}
+
+	const LockUsageDesc &getLockUsageDesc(const Qutex &criterionLock) const
+	{
+		for (auto& lockUsageDesc : locks)
+		{
+			if (&lockUsageDesc.qutex.get() == &criterionLock) {
+				return lockUsageDesc;
+			}
+		}
+
+		// Should never happen if the LockSet is properly constructed
+		throw std::runtime_error(
+			std::string(__func__) +
+			": Qutex not found in this LockSet");
+	}
+
+	/**
+	 * @brief Release a specific qutex early and mark it as released
+	 * @param qutex The qutex to release early
+	 */
+	void releaseQutexEarly(Qutex &qutex)
+	{
+		if (!allLocksAcquired)
+		{
+			throw std::runtime_error(
+				std::string(__func__) +
+				": LockSet::releaseQutexEarly() called but allLocksAcquired is false");
+		}
+
+		auto& lockUsageDesc = const_cast<LockUsageDesc&>(
+			getLockUsageDesc(qutex));
+
+		if (!lockUsageDesc.hasBeenReleased)
+		{
+			lockUsageDesc.qutex.get().release();
+			lockUsageDesc.hasBeenReleased = true;
+		}
+
+		return;
+	}
+
+public:
+	std::vector<LockUsageDesc> locks;
+
+private:
+	SerializedAsynchronousContinuation<OriginalCbFnT> &parentContinuation;
+	bool allLocksAcquired, registeredInQutexQueues;
+};
+
+} // namespace smo
+
+#endif // LOCK_SET_H
@@ -0,0 +1,87 @@
+#ifndef LOCKER_AND_INVOKER_BASE_H
+#define LOCKER_AND_INVOKER_BASE_H
+
+#include <list>
+#include <memory>
+
+namespace smo {
+
+// Forward declaration
+class Qutex;
+
+/**
+ * @brief LockerAndInvokerBase - Base class for lockvoking mechanism
+ *
+ * This base class contains the common functionality needed by Qutex,
+ * including the serialized continuation reference and comparison operators.
+ */
+class LockerAndInvokerBase
+{
+public:
+	/**
+	 * @brief Constructor
+	 * @param serializedContinuationVaddr Raw pointer to the serialized continuation
+	 */
+	explicit LockerAndInvokerBase(const void* serializedContinuationVaddr)
+	: serializedContinuationVaddr(serializedContinuationVaddr)
+	{}
+
+	/**
+	 * @brief Typedef for list of LockerAndInvokerBase shared pointers
+	 */
+	typedef std::list<std::shared_ptr<LockerAndInvokerBase>> List;
+
+	/**
+	 * @brief Get the iterator for this lockvoker in the specified Qutex's queue
+	 * @param qutex The Qutex to get the iterator for
+	 * @return Iterator pointing to this lockvoker in the Qutex's queue
+	 */
+	virtual List::iterator getLockvokerIteratorForQutex(Qutex& qutex) const = 0;
+
+	/**
+	 * @brief Awaken this lockvoker by posting it to its io_service
+	 * @param forceAwaken If true, post even if already awake
+	 */
+	virtual void awaken(bool forceAwaken = false) = 0;
+
+	/* These two are ued to iterate through the lockset of a Lockvoker in a
+	 * template-erased manner. We use them in the gridlock detection algorithm.
+	 */
+	virtual size_t getLockSetSize() const = 0;
+	virtual Qutex& getLockAt(size_t index) const = 0;
+
+	/**
+	 * @brief Equality operator
+	 * 
+	 * Compare by the address of the continuation objects. Why?
+	 * Because there's no guarantee that the lockvoker object that was
+	 * passed in by the io_service invocation is the same object as that
+	 * which is in the qutexQs. Especially because we make_shared() a
+	 * copy when registerInQutexQueues()ing.
+	 *
+	 * Generally when we "wake" a lockvoker by enqueuing it, boost's
+	 * io_service::post will copy the lockvoker object.
+	 */
+	bool operator==(const LockerAndInvokerBase &other) const
+	{
+		return serializedContinuationVaddr == other.serializedContinuationVaddr;
+	}
+
+	/**
+	 * @brief Inequality operator
+	 */
+	bool operator!=(const LockerAndInvokerBase &other) const
+	{
+		return serializedContinuationVaddr != other.serializedContinuationVaddr;
+	}
+
+protected:
+	/* Never let this monstrosity be seen beyond this class's scope.
+	 * Remember what I've taught you, quasi-modo?
+	 */
+	const void* serializedContinuationVaddr;
+};
+
+} // namespace smo
+
+#endif // LOCKER_AND_INVOKER_BASE_H
@@ -0,0 +1,107 @@
+#ifndef QUTEX_H
+#define QUTEX_H
+
+#include <config.h>
+#include <list>
+#include <memory>
+#include <string>
+#include <spinLock.h>
+#include <lockerAndInvokerBase.h>
+
+namespace smo {
+
+/**
+ * @brief Qutex - Queue-based mutex for asynchronous lock management
+ *
+ * A Qutex combines a spinlock, an ownership flag, and a queue of waiting
+ * lockvokers to provide efficient asynchronous lock management with
+ * priority-based acquisition for LockSets.
+ */
+class Qutex
+{
+public:
+	/**
+	 * @brief Constructor
+	 */
+	Qutex([[maybe_unused]] const std::string &_name)
+	:
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+	name(_name), currOwner(nullptr),
+#endif
+	isOwned(false)
+	{}
+
+	/**
+	 * @brief Register a lockvoker in the queue
+	 * @param lockvoker The lockvoker to register
+	 * @return Iterator pointing to the registered lockvoker in the queue
+	 */
+	LockerAndInvokerBase::List::iterator registerInQueue(
+		const std::shared_ptr<LockerAndInvokerBase> &lockvoker
+		)
+	{
+		lock.acquire();
+		auto it = queue.insert(queue.end(), lockvoker);
+		lock.release();
+		return it;
+	}
+
+	/**
+	 * @brief Unregister a lockvoker from the queue
+	 * @param it Iterator pointing to the lockvoker to unregister
+	 * @param shouldLock Whether to acquire the spinlock before erasing (default: true)
+	 */
+	void unregisterFromQueue(
+		LockerAndInvokerBase::List::iterator it, bool shouldLock = true
+		)
+	{
+		if (shouldLock)
+		{
+			lock.acquire();
+			queue.erase(it);
+			lock.release();
+		}
+		else {
+			queue.erase(it);
+		}
+	}
+
+	/**
+	 * @brief Try to acquire the lock for a lockvoker
+	 * @param tryingLockvoker The lockvoker attempting to acquire the lock
+	 * @param nRequiredLocks Number of locks required by the lockvoker's LockSet
+	 * @return true if the lock was successfully acquired, false otherwise
+	 */
+	bool tryAcquire(
+		const LockerAndInvokerBase &tryingLockvoker, int nRequiredLocks);
+
+	/**
+	 * @brief Handle backoff when a lockvoker fails to acquire all required locks
+	 * @param failedAcquirer The lockvoker that failed to acquire all locks
+	 * @param nRequiredLocks Number of locks required by the lockvoker's LockSet
+	 */
+	void backoff(const LockerAndInvokerBase &failedAcquirer, int nRequiredLocks);
+
+	/**
+	 * @brief Release the lock and wake up the next waiting lockvoker
+	 */
+	void release();
+
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+	std::shared_ptr<LockerAndInvokerBase> getCurrOwner() const
+		{ return currOwner; }
+#endif
+
+public:
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+	std::string name;
+	std::shared_ptr<LockerAndInvokerBase> currOwner;
+#endif
+	SpinLock lock;
+	LockerAndInvokerBase::List queue;
+	bool isOwned;
+};
+
+} // namespace smo
+
+#endif // QUTEX_H
@@ -0,0 +1,164 @@
+#ifndef QUTEX_ACQUISITION_HISTORY_TRACKER_H
+#define QUTEX_ACQUISITION_HISTORY_TRACKER_H
+
+#include <unordered_map>
+#include <memory>
+#include <forward_list>
+#include <functional>
+#include "spinLock.h"
+
+
+namespace smo {
+
+// Forward declarations
+class Qutex;
+class AsynchronousContinuationChainLink;
+class DependencyGraph;
+
+/**
+ * @brief QutexAcquisitionHistoryTracker - Tracks acquisition history for
+ *        gridlock detection
+ *
+ * This class maintains a central acquisition history to track all lockvokers
+ * suspected of being gridlocked. It stores information about what locks each
+ * timed-out lockvoker wants and what locks they hold in their continuation
+ * history.
+ */
+class QutexAcquisitionHistoryTracker
+{
+public:
+	/**
+	 * @brief Type definition for the acquisition history entry
+	 *
+	 * pair.first: The firstFailedQutex that this lockvoker WANTS but can't
+	 * acquire
+	 * pair.second: A unique_ptr to a list of all acquired Qutexes in this
+	 * lockvoker's continuation history
+	 */
+	typedef std::pair<
+		std::reference_wrapper<Qutex>,
+		std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>
+	> AcquisitionHistoryEntry;
+
+	/**
+	 * @brief Type definition for the acquisition history map
+	 *
+	 * Key: std::shared_ptr<AsynchronousContinuationChainLink>
+	 *		(the continuation that contains the timed-out lockvoker)
+	 * Value: AcquisitionHistoryEntry
+	 *		(its wanted lock (aka: firstFailedQutex/pair.first) + held locks)
+	 */
+	typedef std::unordered_map<
+		std::shared_ptr<AsynchronousContinuationChainLink>,
+		AcquisitionHistoryEntry
+	> AcquisitionHistoryMap;
+
+public:
+	static QutexAcquisitionHistoryTracker& getInstance()
+	{
+		static QutexAcquisitionHistoryTracker instance;
+		return instance;
+	}
+
+	/**
+	 * @brief Add a continuation to the acquisition history if it doesn't
+	 *	already exist
+	 * @param continuation Shared pointer to the
+	 *	AsynchronousContinuationChainLink
+	 * @param wantedLock The lock that this continuation wants but can't
+	 *	acquire
+	 * @param heldLocks Unique pointer to list of locks held in this
+	 *	continuation's history (will be moved)
+	 */
+	void addIfNotExists(
+		std::shared_ptr<AsynchronousContinuationChainLink> &continuation,
+		Qutex& wantedLock,
+		std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>
+			heldLocks
+		)
+	{
+		acquisitionHistoryLock.acquire();
+
+		auto it = acquisitionHistory.find(continuation);
+		// If a continuation already exists, don't add it again
+		if (it != acquisitionHistory.end())
+		{
+			acquisitionHistoryLock.release();
+			return;
+		}
+
+		acquisitionHistory.emplace(continuation, std::make_pair(
+			std::ref(wantedLock), std::move(heldLocks)));
+
+		acquisitionHistoryLock.release();
+	}
+
+	/**
+	 * @brief Remove a continuation from the acquisition history
+	 *
+	 * @param continuation Shared pointer to the
+	 *        AsynchronousContinuationChainLink to remove
+	 * @return true if the continuation was found and removed, false if not found
+	 */
+	bool remove(
+		std::shared_ptr<AsynchronousContinuationChainLink> &continuation
+		)
+	{
+		acquisitionHistoryLock.acquire();
+
+		auto it = acquisitionHistory.find(continuation);
+		if (it != acquisitionHistory.end())
+		{
+			acquisitionHistory.erase(it);
+
+			acquisitionHistoryLock.release();
+			return true;
+		}
+
+		acquisitionHistoryLock.release();
+		return false;
+	}
+
+	bool heuristicallyTraceContinuationHistoryForGridlockOn(
+		Qutex &firstFailedQutex,
+		std::shared_ptr<AsynchronousContinuationChainLink>&
+			currentContinuation);
+	bool completelyTraceContinuationHistoryForGridlockOn(
+		Qutex &firstFailedQutex);
+
+	/**
+	 * @brief Generates a dependency graph among known continuations, based on
+	 * the currently known acquisition history. There may well be a cyclical
+	 * dependency which hasn't been reported to the history tracker yet.
+	 * @param dontAcquireLock If true, skips acquiring the internal spinlock
+	 * (assumes caller already holds it)
+	 */
+	[[nodiscard]] std::unique_ptr<DependencyGraph> generateGraph(
+		bool dontAcquireLock = false);
+
+	// Disable copy constructor and assignment operator
+	QutexAcquisitionHistoryTracker(
+		const QutexAcquisitionHistoryTracker&) = delete;
+	QutexAcquisitionHistoryTracker& operator=(
+		const QutexAcquisitionHistoryTracker&) = delete;
+
+private:
+	QutexAcquisitionHistoryTracker() = default;
+	~QutexAcquisitionHistoryTracker() = default;
+
+private:
+	/**	EXPLANATION:
+	 * We use a SpinLock here instead of a Qutex because this acquisition
+	 * history tracker is invoked within the LockerAndInvoker.
+	 * Since LockerAndInvoker is too tightly coupled with Qutex workings, using
+	 * a Qutex here would create a circular dependency or deadlock situation.
+	 * Therefore, it's best to use a SpinLock on the history class to avoid
+	 * these coupling issues.
+	 */
+	SpinLock acquisitionHistoryLock;
+	AcquisitionHistoryMap acquisitionHistory;
+};
+
+} // namespace smo
+
+#endif // QUTEX_ACQUISITION_HISTORY_TRACKER_H
@@ -0,0 +1,588 @@
+#ifndef SERIALIZED_ASYNCHRONOUS_CONTINUATION_H
+#define SERIALIZED_ASYNCHRONOUS_CONTINUATION_H
+
+#include <config.h>
+#include <memory>
+#include <atomic>
+#include <chrono>
+#include <iostream>
+#include <optional>
+#include <componentThread.h>
+#include <lockSet.h>
+#include <asynchronousContinuation.h>
+#include <lockerAndInvokerBase.h>
+#include <callback.h>
+#include <qutexAcquisitionHistoryTracker.h>
+
+namespace smo {
+
+template <class OriginalCbFnT>
+class SerializedAsynchronousContinuation
+:	public PostedAsynchronousContinuation<OriginalCbFnT>
+{
+public:
+	SerializedAsynchronousContinuation(
+		const std::shared_ptr<ComponentThread> &caller,
+		Callback<OriginalCbFnT> originalCbFn,
+		std::vector<std::reference_wrapper<Qutex>> requiredLocks)
+	:	PostedAsynchronousContinuation<OriginalCbFnT>(caller, originalCbFn),
+		requiredLocks(*this, std::move(requiredLocks))
+	{}
+
+	template<typename... Args>
+	void callOriginalCb(Args&&... args)
+	{
+		requiredLocks.release();
+		PostedAsynchronousContinuation<OriginalCbFnT>::callOriginalCb(
+			std::forward<Args>(args)...);
+	}
+
+	// Return list of all qutexes in predecessors' LockSets; excludes self.
+	[[nodiscard]]
+	std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>
+	getAcquiredQutexHistory() const;
+
+	/**
+	 * @brief Release a specific qutex early
+	 * @param qutex The qutex to release early
+	 */
+	void releaseQutexEarly(Qutex &qutex)
+		{ requiredLocks.releaseQutexEarly(qutex); }
+
+public:
+	LockSet<OriginalCbFnT> requiredLocks;
+	std::atomic<bool> isAwakeOrBeingAwakened{false};
+
+	/**
+	 * @brief LockerAndInvoker - Template class for lockvoking mechanism
+	 *
+	 * This class wraps a std::bind result and provides locking functionality.
+	 * When locks cannot be acquired, the object re-posts itself to the io_service
+	 * queue, implementing the "spinqueueing" pattern.
+	 */
+	template <class InvocationTargetT>
+	class LockerAndInvoker
+	:	public LockerAndInvokerBase
+	{
+	public:
+		/**
+		 * @brief Constructor that immediately posts to io_service
+		 * @param serializedContinuation Reference to the serialized continuation
+		 *	containing LockSet and target io_service
+		 * @param target The ComponentThread whose io_service to post to
+		 * @param invocationTarget The std::bind result to invoke when locks are acquired
+		 */
+		LockerAndInvoker(
+			SerializedAsynchronousContinuation<OriginalCbFnT>
+				&serializedContinuation,
+			const std::shared_ptr<ComponentThread>& target,
+			InvocationTargetT invocationTarget)
+		:	LockerAndInvokerBase(&serializedContinuation),
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+		creationTimestamp(std::chrono::steady_clock::now()),
+#endif
+		serializedContinuation(serializedContinuation),
+		target(target),
+		invocationTarget(std::move(invocationTarget))
+		{
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+			std::optional<std::reference_wrapper<Qutex>> firstDuplicatedQutex =
+				traceContinuationHistoryForDeadlock();
+
+			if (firstDuplicatedQutex.has_value())
+			{
+				handleDeadlock(firstDuplicatedQutex.value().get());
+				throw std::runtime_error(
+					"LockerAndInvoker::LockerAndInvoker(): Deadlock detected");
+			}
+#endif // CONFIG_ENABLE_DEBUG_LOCKS
+
+			firstWake();
+		}
+
+		/**
+		 * @brief Function call operator - tries to acquire locks and either
+		 * 	invokes the target or returns (already registered in qutex queues)
+		 */
+		void operator()();
+
+		/**
+		 * @brief Get the iterator for this lockvoker in the specified Qutex's queue
+		 * @param qutex The Qutex to get the iterator for
+		 * @return Iterator pointing to this lockvoker in the Qutex's queue
+		 */
+		LockerAndInvokerBase::List::iterator
+		getLockvokerIteratorForQutex(Qutex& qutex) const override
+		{
+			return serializedContinuation.requiredLocks.getLockUsageDesc(
+				qutex).iterator;
+		}
+
+		/**
+		 * @brief Awaken this lockvoker by posting it to its io_service
+		 * @param forceAwaken If true, post even if already awake
+		 */
+		void awaken(bool forceAwaken = false) override
+		{
+			bool prevVal = serializedContinuation.isAwakeOrBeingAwakened
+				.exchange(true);
+
+			if (prevVal == true && !forceAwaken)
+				{ return; }
+
+			target->getIoService().post(*this);
+		}
+
+		size_t getLockSetSize() const override
+			{ return serializedContinuation.requiredLocks.locks.size(); }
+
+		Qutex& getLockAt(size_t index) const override
+		{
+			return serializedContinuation.requiredLocks.locks[index]
+				.qutex.get();
+		}
+
+	private:
+		// Allow awakening by resetting the awake flag
+		void allowAwakening()
+			{ serializedContinuation.isAwakeOrBeingAwakened.store(false); }
+
+		/**	EXPLANATION:
+		 * We create a copy of the Lockvoker and then give sh_ptrs to that
+		 * *COPY*, to each Qutex's internal queue. This enables us to keep
+		 * the AsyncContinuation sh_ptr (which the Lockvoker contains within
+		 * itself) alive without wasting too much memory.
+		 *
+		 * This way the io_service objects can remove the lockvoker from
+		 * their queues and there'll be a copy of the lockvoker in each
+		 * Qutex's queue.
+		 *
+		 * For non-serialized, posted continuations, they won't be removed
+		 * from the io_service queue until they're executed, so there's no
+		 * need to create copies of them. Lockvokers are removed from their
+		 * io_service, potentially without being executed if they fail to
+		 * acquire all locks.
+		 */
+		void registerInLockSet()
+		{
+			auto sharedLockvoker = std::make_shared<
+				LockerAndInvoker<InvocationTargetT>>(*this);
+
+			serializedContinuation.requiredLocks.registerInQutexQueues(
+				sharedLockvoker);
+		}
+
+		/**
+		 * @brief First wake - register in queues and awaken
+		 * 
+		 * Sets isAwake=true before calling awaken with forceAwaken to ensure
+		 * that none of the locks we just registered with awaken()s a duplicate
+		 * copy of this lockvoker on the io_service.
+		 */
+		void firstWake()
+		{
+			serializedContinuation.isAwakeOrBeingAwakened.store(true);
+			registerInLockSet();
+			// Force awaken since we just set the flag above
+			awaken(true);
+		}
+
+		// Has CONFIG_DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS elapsed since creation?
+		bool isDeadlockLikely() const
+		{
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+			auto now = std::chrono::steady_clock::now();
+			auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
+				now - creationTimestamp);
+			return elapsed.count() >= CONFIG_DEBUG_QUTEX_DEADLOCK_TIMEOUT_MS;
+#else
+			return false;
+#endif
+		}
+
+		// Wrapper around isDeadlockLikely for gridlock detection
+		bool isGridlockLikely() const
+			{ return isDeadlockLikely(); }
+
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+		struct obsolete {
+			bool traceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex);
+		};
+
+		bool traceContinuationHistoryForDeadlockOn(Qutex &firstFailedQutex);
+		std::optional<std::reference_wrapper<Qutex>>
+		traceContinuationHistoryForDeadlock(void)
+		{
+			for (auto& lockUsageDesc
+				: serializedContinuation.requiredLocks.locks)
+			{
+				if (traceContinuationHistoryForDeadlockOn(
+					lockUsageDesc.qutex.get()))
+				{
+					return std::ref(lockUsageDesc.qutex.get());
+				}
+			}
+			return std::nullopt;
+		}
+
+		/**
+		 * @brief Handle a likely deadlock situation by logging debug information
+		 * @param firstFailedQutex The first qutex that failed acquisition
+		 */
+		void handleDeadlock(const Qutex &firstFailedQutex)
+		{
+			std::cerr << __func__ << ": Deadlock: "
+				<< "Lockvoker has been waiting for "
+				<< std::chrono::duration_cast<std::chrono::milliseconds>(
+					std::chrono::steady_clock::now() - this->creationTimestamp)
+					.count()
+				<< "ms, failed on qutex @" << &firstFailedQutex
+				<< " (" << firstFailedQutex.name << ")" << std::endl;
+		}
+
+		void handleGridlock(const Qutex &firstFailedQutex)
+		{
+			std::cerr << __func__ << ": Gridlock: "
+				<< "Lockvoker has been waiting for "
+				<< std::chrono::duration_cast<std::chrono::milliseconds>(
+					std::chrono::steady_clock::now() - this->creationTimestamp)
+					.count()
+				<< "ms, failed on qutex @" << &firstFailedQutex
+				<< " (" << firstFailedQutex.name << ")" << std::endl;
+		}
+#endif
+
+	private:
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+		std::chrono::steady_clock::time_point creationTimestamp;
+#endif
+		SerializedAsynchronousContinuation<OriginalCbFnT>
+			&serializedContinuation;
+		std::shared_ptr<ComponentThread> target;
+		InvocationTargetT invocationTarget;
+	};
+};
+
+/******************************************************************************/
+
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+
+template <class OriginalCbFnT>
+std::unique_ptr<std::forward_list<std::reference_wrapper<Qutex>>>
+SerializedAsynchronousContinuation<OriginalCbFnT>::getAcquiredQutexHistory()
+const
+{
+	auto heldLocks = std::make_unique<
+		std::forward_list<std::reference_wrapper<Qutex>>>();
+
+	/**	EXPLANATION:
+	 * Walk through the continuation chain to collect all acquired locks
+	 *
+	 * We don't add the current continuation's locks because it's the one
+	 * failing to acquire locks and backing off. So we start from the previous
+	 * continuation.
+	 */
+	for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
+			this->getCallersContinuationShPtr();
+		 currContin != nullptr;
+		 currContin = currContin->getCallersContinuationShPtr())
+	{
+		auto serializedCont = std::dynamic_pointer_cast<
+			SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
+
+		if (serializedCont == nullptr) { continue; }
+
+		// Add this continuation's locks to the held locks list
+		for (size_t i = 0; i < serializedCont->requiredLocks.locks.size(); ++i)
+		{
+			heldLocks->push_front(serializedCont->requiredLocks.locks[i].qutex);
+		}
+	}
+
+	return heldLocks;
+}
+
+template <class OriginalCbFnT>
+template <class InvocationTargetT>
+bool
+SerializedAsynchronousContinuation<OriginalCbFnT>
+::LockerAndInvoker<InvocationTargetT>
+::traceContinuationHistoryForDeadlockOn(Qutex& firstFailedQutex)
+{
+	/**	EXPLANATION:
+	 * In this function we will trace through the chain of continuations that
+	 * led up to this Lockvoker's continuation. For each continuation which is
+	 * a SerializedAsynchronousContinuation, we check through its LockSet to see
+	 * if it contains the lock that failed acquisition. If it does, we have a
+	 * deadlock.
+	 */
+
+	/* We can't start with the continuation directly referenced by this starting
+	* Lockvoker as it would contain the all locks we're currently trying to
+	* acquire...and rightly so because it's the continuation for this current
+	* lockvoker.
+	*/
+	for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
+			this->serializedContinuation.getCallersContinuationShPtr();
+		currContin != nullptr;
+		currContin = currContin->getCallersContinuationShPtr())
+	{
+		auto serializedCont = std::dynamic_pointer_cast<
+			SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
+
+		if (serializedCont == nullptr) { continue; }
+
+		// Check if the firstFailedQutex is in this continuation's LockSet
+		try {
+			serializedCont->requiredLocks.getLockUsageDesc(firstFailedQutex);
+		} catch (const std::runtime_error& e) {
+			std::cerr << __func__ << ": " << e.what() << std::endl;
+			continue;
+		}
+
+		std::cout << __func__ << ":Deadlock detected: Found "
+			<< "firstFailedQutex @" << &firstFailedQutex
+			<< " (" << firstFailedQutex.name << ") in LockSet of "
+			<< "SerializedAsynchronousContinuation @"
+			<< serializedCont.get() << std::endl;
+
+		return true;
+	}
+
+	return false;
+}
+
+template <class OriginalCbFnT>
+template <class InvocationTargetT>
+bool
+SerializedAsynchronousContinuation<OriginalCbFnT>
+::LockerAndInvoker<InvocationTargetT>
+::obsolete::traceContinuationHistoryForGridlockOn(Qutex &firstFailedQutex)
+{
+	/**	EXPLANATION:
+	 * In this function we check for gridlocks which are slightly different
+	 * from deadlocks. In a gridlock, two requests are waiting for locks that
+	 * are held by the other. I.e:
+	 *
+	 * R1 holds LockA and is waiting for LockB.
+	 * R2 holds LockB and is waiting for LockA.
+	 *
+	 * This differs from deadlocks because it's not a single request which is
+	 * attempting to re-acquire a lock that it already holds.
+	 *
+	 * To detect this condition, we wait until the acquisition timeout has
+	 * expired. Then: we extract the current owner of the first lock we're
+	 * failing to acquire.
+	 *
+	 * From there, we go through each of the locks in the foreign owner's
+	 * current (i.e: immediate, most recent continuation's) required LockSet.
+	 * For each of the locks in the foreign owner's most immediate required
+	 * LockSet, we trace backward in our *OWN* history to see if any of *OUR*
+	 * continuations (excluding our most immediate continuation) contains that
+	 * lock.
+	 *
+	 * If we find a match, that means that we're holding a lock that the foreign
+	 * owner is waiting for. And we already know that the foreign owner is
+	 * holding a lock that we're waiting for (when we extracted the current
+	 * owner of the first failed lock in our most immediate Lockset).
+	 *
+	 * Hence, we have a gridlock.
+	 */
+
+	std::shared_ptr<LockerAndInvokerBase> foreignOwnerShPtr =
+		firstFailedQutex.getCurrOwner();
+	// If no current owner, can't be a gridlock
+	if (foreignOwnerShPtr == nullptr)
+		{ return false; }
+
+	// Use reference for the rest of the function for safety.
+	LockerAndInvokerBase &foreignOwner = *foreignOwnerShPtr;
+
+	/* For each lock in the foreign owner's LockSet, check if we hold it
+	 * in any of our previous continuations (excluding our most immediate one)
+	 */
+	for (size_t i = 0; i < foreignOwner.getLockSetSize(); ++i)
+	{
+		Qutex& foreignLock = foreignOwner.getLockAt(i);
+
+		/* Skip the firstFailedQutex since we already know the foreign owner
+		 * holds it -- hence it's impossible for any of our previous
+		 * continuations to hold it.
+		 */
+		if (&foreignLock == &firstFailedQutex)
+			{ continue; }
+
+		/**	EXPLANATION:
+		 * Trace backward through our continuation history (excluding our most
+		 * immediate continuation).
+		 *
+		 * The reason we exclude our most immediate continuation is because the
+		 * LockSet acquisition algorithm backs off if it fails to acquire ALL
+		 * locks in the set. So if the lock that the foreign owner is waiting
+		 * for is in our most immediate continuation, and NOT in one of our
+		 * previous continuations, then we will back off and the foreign owner
+		 * should eventually be able to acquire that lock.
+		 */
+		for (std::shared_ptr<AsynchronousContinuationChainLink> currContin =
+				this->serializedContinuation.getCallersContinuationShPtr();
+			 currContin != nullptr;
+			 currContin = currContin->getCallersContinuationShPtr())
+		{
+			auto serializedCont = std::dynamic_pointer_cast<
+				SerializedAsynchronousContinuation<OriginalCbFnT>>(currContin);
+
+			if (serializedCont == nullptr) { continue; }
+
+			// Check if this continuation holds the foreign lock
+			try {
+				const auto& lockUsageDesc = serializedCont->requiredLocks
+					.getLockUsageDesc(foreignLock);
+
+				// Matched! We hold a lock that the foreign owner is waiting for
+				std::cout << __func__ << ": Gridlock detected: We hold lock @"
+					<< &foreignLock << " (" << foreignLock.name << ") in "
+					"continuation @" << serializedCont.get()
+					<< ", while foreign owner @" << &foreignOwner
+					<< " holds lock @" << &firstFailedQutex << " ("
+					<< firstFailedQutex.name << ") that we're waiting for"
+					<< std::endl;
+
+				return true;
+			} catch (const std::runtime_error& e) {
+				// This continuation doesn't hold the foreign lock. Continue.
+				continue;
+			}
+		}
+	}
+
+	return false;
+}
+
+#endif // CONFIG_ENABLE_DEBUG_LOCKS
+
+template <class OriginalCbFnT>
+template <class InvocationTargetT>
+void SerializedAsynchronousContinuation<OriginalCbFnT>
+::LockerAndInvoker<InvocationTargetT>::operator()()
+{
+	if (ComponentThread::getSelf() != target)
+	{
+		throw std::runtime_error(
+			"LockerAndInvoker::operator(): Thread safety violation - "
+			"executing on wrong ComponentThread");
+	}
+
+	std::optional<std::reference_wrapper<Qutex>> firstFailedQutexRet;
+	bool deadlockLikely = isDeadlockLikely();
+	bool gridlockLikely = isGridlockLikely();
+
+	if (!serializedContinuation.requiredLocks.tryAcquireOrBackOff(
+		*this, firstFailedQutexRet))
+	{
+		// Just allow this lockvoker to be dropped from its io_service.
+		allowAwakening();
+		if (!deadlockLikely && !gridlockLikely)
+			{ return; }
+
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+		Qutex	&firstFailedQutex = firstFailedQutexRet.value().get();
+		bool isDeadlock = traceContinuationHistoryForDeadlockOn(
+			firstFailedQutex);
+
+		bool gridlockIsHeuristicallyLikely = false;
+		bool gridlockIsAlgorithmicallyLikely = false;
+
+		if (gridlockLikely)
+		{
+			auto& tracker = QutexAcquisitionHistoryTracker
+				::getInstance();
+
+			auto heldLocks = serializedContinuation
+				.getAcquiredQutexHistory();
+
+			// Add this continuation to the tracker
+			auto currentContinuationShPtr = serializedContinuation
+				.shared_from_this();
+
+			tracker.addIfNotExists(
+				currentContinuationShPtr,
+				firstFailedQutex, std::move(heldLocks));
+
+			gridlockIsHeuristicallyLikely = tracker
+				.heuristicallyTraceContinuationHistoryForGridlockOn(
+					firstFailedQutex, currentContinuationShPtr);
+
+			if (gridlockIsHeuristicallyLikely)
+			{
+				gridlockIsAlgorithmicallyLikely = tracker
+					.completelyTraceContinuationHistoryForGridlockOn(
+						firstFailedQutex);
+			}
+		}
+
+		bool isGridlock = (gridlockIsHeuristicallyLikely
+			|| gridlockIsAlgorithmicallyLikely);
+
+		if (!isDeadlock && !isGridlock)
+			{ return; }
+
+		if (isDeadlock) { handleDeadlock(firstFailedQutex); }
+		if (isGridlock) { handleGridlock(firstFailedQutex); }
+#endif
+		return;
+	}
+
+	/**	EXPLANATION:
+	 * Successfully acquired all locks, so unregister from qutex queues.
+	 * We do this here so that we can free up queue slots in the qutex
+	 * queues for other lockvokers that may be waiting to acquire the
+	 * locks. The size of the qutex queues does matter for other
+	 * contending lockvokers; and so also does their position in the
+	 * queues.
+	 *
+	 * The alternative is to leave ourself in the queues until we
+	 * eventually release all locks; and given that we may hold locks
+	 * even across true async hardware bottlenecks, this could take a
+	 * long time.
+	 *
+	 * Granted, the fact that we own the locks means that even though
+	 * we've removed ourselves from the queues, other lockvokers still
+	 * can't acquire the locks anyway.
+	 */
+	serializedContinuation.requiredLocks.unregisterFromQutexQueues();
+
+#ifdef CONFIG_ENABLE_DEBUG_LOCKS
+	/**	EXPLANATION:
+	 * If we were being tracked for gridlock detection but successfully
+	 * acquired all locks, it was a false positive due to timed delay,
+	 * long-running operation, or I/O delay
+	 */
+	if (gridlockLikely)
+	{
+		std::shared_ptr<AsynchronousContinuationChainLink>
+			currentContinuationShPtr =
+				serializedContinuation.shared_from_this();
+
+		bool removed = QutexAcquisitionHistoryTracker::getInstance()
+			.remove(currentContinuationShPtr);
+
+		if (removed)
+		{
+			std::cerr
+				<< "LockerAndInvoker::operator(): False positive "
+				"gridlock detection - continuation @"
+				<< &serializedContinuation
+				<< " was being tracked but successfully acquired all "
+				"locks. This was likely due to timed delay, "
+				"long-running operation, or I/O delay."
+				<< std::endl;
+		}
+	}
+#endif
+
+	invocationTarget();
+}
+
+} // namespace smo
+
+#endif // SERIALIZED_ASYNCHRONOUS_CONTINUATION_H
@@ -0,0 +1,121 @@
+#ifndef SPIN_LOCK_H
+#define SPIN_LOCK_H
+
+#include <atomic>
+#ifdef __x86_64__
+#include <immintrin.h>
+#elif defined(__i386__)
+#include <xmmintrin.h>
+#elif defined(__arm__)
+#include <arm_neon.h>
+#elif defined(__aarch64__)
+#include <arm_neon.h>
+#elif defined(__aarch32__)
+#include <arm_neon.h>
+#endif
+
+namespace smo {
+
+/**
+ * @brief Simple spinlock using std::atomic
+ */
+class SpinLock
+{
+public:
+	SpinLock()
+	: locked(false)
+	{}
+
+	bool tryAcquire()
+	{
+		bool expected = false;
+		return locked.compare_exchange_strong(expected, true);
+	}
+
+	inline void spinPause()
+	{
+#ifdef __x86_64__
+		_mm_pause();
+#elif defined(__i386__)
+		_mm_pause();
+#elif defined(__arm__)
+		__asm__ volatile("yield");
+#elif defined(__aarch64__)
+		__asm__ volatile("yield");
+#elif defined(__aarch32__)
+		__asm__ volatile("yield");
+#else
+# error "Unsupported architecture"
+#endif
+	}
+
+	void acquire()
+	{
+		while (!tryAcquire())
+		{
+			/**	EXPLANATION:
+			 * Busy-wait: keep trying to acquire the lock
+			 * The CPU will spin here until the lock becomes available
+			 *
+			 * The spinPause() function is architecture-specific and is
+			 * essential because I once fried an older Intel M-class laptop CPU
+			 * when I forgot to include a PAUSE instruction in a for (;;){}
+			 * loop. I'm not interested in frying my RPi or my other testbed
+			 * robot boards.
+			 */
+			spinPause();
+		}
+	}
+
+	void release()
+	{
+		locked.store(false);
+	}
+
+	/**
+	 * @brief RAII guard for SpinLock
+	 * Locks the spinlock on construction and unlocks on destruction
+	 */
+	class Guard
+	{
+	public:
+		explicit Guard(SpinLock& lock)
+		: lock_(lock), unlocked_(false)
+		{
+			lock_.acquire();
+		}
+
+		~Guard()
+		{
+			if (!unlocked_) {
+				lock_.release();
+			}
+		}
+
+		void unlockPrematurely()
+		{
+			if (!unlocked_)
+			{
+				lock_.release();
+				unlocked_ = true;
+			}
+		}
+
+		// Non-copyable, non-movable
+		Guard(const Guard&) = delete;
+		Guard& operator=(const Guard&) = delete;
+		Guard(Guard&&) = delete;
+		Guard& operator=(Guard&&) = delete;
+
+	private:
+		SpinLock& lock_;
+		bool unlocked_;
+	};
+
+private:
+	std::atomic<bool> locked;
+};
+
+} // namespace smo
+
+#endif // SPIN_LOCK_H
@@ -0,0 +1,63 @@
+#ifndef _COMBINATORIAL_LOGIC_EXPRESSION_H
+#define _COMBINATORIAL_LOGIC_EXPRESSION_H
+
+#include <vector>
+#include <memory>
+#include <user/logic.h>
+#include <mentalEntity.h>
+#include <concept.h>
+#include <user/stimFrame.h>
+
+namespace smo {
+namespace cologex {
+
+class Comparator
+:	public MentalEntity, public logic::Operand
+{
+public:
+	/**		EXPLANATION:
+	 * The reference for a Comparator is the fixed mentity or range of mentities
+	 * that this comparator is intended to validate a match against.
+	 *
+	 * There are several mentities against which a comparator can match. At the
+	 * time of writing, we're fairly sure that these will be at minimum,
+	 * qualia, chronomena and mentena.
+	 */
+	std::shared_ptr<MentalEntity> reference;
+};
+
+class ComparatorExpression
+:	public logic::UnaryExpression
+{
+public:
+	ComparatorExpression(
+		logic::Operator &op, std::shared_ptr<Comparator> &comparator
+	)
+	:	logic::UnaryExpression(
+			op, std::static_pointer_cast<logic::Operand>(comparator))
+	{}
+};
+
+class CombinatorialLogicExpression
+:	public MentalEntity, public logic::Expression, public Concept
+{
+public:
+
+};
+
+class CombinatorialLogicExpressionSeq
+:	public MentalEntity, public Concept
+{
+public:
+	std::vector<
+		std::pair<stim_buff::SimultaneityStamp, CombinatorialLogicExpression>
+	> expressions;
+};
+
+typedef CombinatorialLogicExpression Cologex;
+typedef CombinatorialLogicExpressionSeq CologexSeq;
+
+} // namespace cologex
+} // namespace smo
+
+#endif
@@ -0,0 +1,134 @@
+#ifndef _USER_COMPUTE_H
+#define _USER_COMPUTE_H
+
+#include <memory>
+#include <vector>
+#include <string_view>
+#define CL_TARGET_OPENCL_VERSION 120
+#include <CL/cl.h>
+
+namespace smo {
+namespace compute {
+
+// Helper function to validate OpenCL version
+bool validateOpenClVersion(
+	std::string_view versionStr, std::string_view versionType,
+	int minMajor, int minMinor);
+
+/**
+ * @brief OpenCL compute device information
+ *
+ * Manages a single OpenCL device, creating and owning its context and command
+ * queue.
+ */
+class ComputeDevice
+{
+public:
+	/**
+	 * @brief Construct a ComputeDevice from platform and device IDs
+	 *
+	 * Creates the OpenCL context and command queue for the device.
+	 * Throws std::runtime_error if context or queue creation fails.
+	 *
+	 * @param platformId OpenCL platform ID
+	 * @param deviceId OpenCL device ID
+	 */
+	ComputeDevice(cl_platform_id platformId, cl_device_id deviceId);
+
+	~ComputeDevice()
+	{
+		if (commandQueue)
+		{
+			clReleaseCommandQueue(commandQueue);
+			commandQueue = nullptr;
+		}
+		if (context)
+		{
+			clReleaseContext(context);
+			context = nullptr;
+		}
+	}
+
+	// Non-copyable
+	ComputeDevice(const ComputeDevice&) = delete;
+	ComputeDevice& operator=(const ComputeDevice&) = delete;
+
+	cl_platform_id platform;
+	cl_device_id device;
+	cl_context context;
+	cl_command_queue commandQueue;
+};
+
+/**
+ * @brief Association between an OpenCL buffer and a compute device
+ */
+struct ClBufferDeviceAssociation
+{
+	ClBufferDeviceAssociation(
+		cl_mem buf, const std::shared_ptr<ComputeDevice>& dev)
+	: buffer(buf), device(dev)
+	{}
+
+	cl_mem buffer;
+	std::shared_ptr<ComputeDevice> device;
+};
+
+/**
+ * @brief OpenCL buffer created on all compute devices
+ *
+ * Manages a USE_HOST_PTR buffer created on all available compute devices.
+ * The constructor creates buffers for all devices, and the destructor releases
+ * them.
+ */
+class ClBuffer
+{
+public:
+	/**
+	 * @brief Construct a ClBuffer and create buffers on all devices
+	 *
+	 * Creates a USE_HOST_PTR buffer on each device's context.
+	 * Throws std::runtime_error if buffer creation fails for any device.
+	 *
+	 * @param hostPtr Host pointer to use
+	 * @param size Size of buffer in bytes
+	 * @param flags Additional OpenCL memory flags
+	 * @param devices Vector of compute devices to create buffers on
+	 */
+	ClBuffer(
+		void* hostPtr, size_t size, cl_mem_flags flags,
+		const std::vector<std::shared_ptr<ComputeDevice>>& devices);
+
+	~ClBuffer()
+	{
+		for (auto& assoc : associations)
+		{
+			if (assoc.buffer)
+			{
+				clReleaseMemObject(assoc.buffer);
+				assoc.buffer = nullptr;
+			}
+		}
+	}
+
+	// Non-copyable
+	ClBuffer(const ClBuffer&) = delete;
+	ClBuffer& operator=(const ClBuffer&) = delete;
+
+	/**
+	 * @brief Get the cl_mem handle for a specific compute device
+	 * @param device The compute device to find the buffer for
+	 * @return The cl_mem handle for the device, or nullptr if not found
+	 */
+	cl_mem getAssociatedBufferHandleForDevice(
+		const std::shared_ptr<ComputeDevice>& device) const;
+
+	void* hostPtr;
+	size_t size;
+	cl_mem_flags flags;
+	std::vector<ClBufferDeviceAssociation> associations;
+};
+
+} // namespace compute
+} // namespace smo
+
+#endif // _USER_COMPUTE_H
@@ -5,6 +5,8 @@
 #include <string>
 #include <iostream>
 #include <sstream>
+#include <algorithm>
+#include <stdexcept>

 namespace smo {
 namespace device {
@@ -23,6 +25,8 @@ public:
    {
        return deviceIdentifier == other.deviceIdentifier &&
               sensorType == other.sensorType &&
+               qualeIfaceApi == other.qualeIfaceApi &&
+               stimBuffApi == other.stimBuffApi &&
               provider == other.provider &&
               deviceSelector == other.deviceSelector;
    }
@@ -30,9 +34,10 @@ public:
 public:
    std::string deviceIdentifier;
    char sensorType;
-    std::string implexor;
-    std::string api;
-    std::vector<std::pair<std::string,std::string>> apiParams;
+    std::string qualeIfaceApi;
+    std::vector<std::pair<std::string,std::string>> qualeIfaceApiParams;
+    std::string stimBuffApi;
+    std::vector<std::pair<std::string,std::string>> stimBuffApiParams;
    std::string provider;
    std::vector<std::pair<std::string,std::string>> providerParams;
    std::string deviceSelector;
@@ -42,9 +47,18 @@ public:
        std::ostringstream os;
        os << "Device Identifier: " << deviceIdentifier
            << ", Sensor Type: " << sensorType
-            << ", Implexor: " << implexor << ", API: " << api
-            << ", API Params: (";
-        for (const auto& param : apiParams)
+            << ", QualeIface API: " << qualeIfaceApi << ", QualeIface API Params: (";
+        for (const auto& param : qualeIfaceApiParams)
+        {
+            os << param.first;
+            if (!param.second.empty()) {
+                os << "=" << param.second;
+            }
+            os << " ";
+        }
+        os << "), StimBuff API: " << stimBuffApi
+            << ", StimBuff API Params: (";
+        for (const auto& param : stimBuffApiParams)
        {
            os << param.first;
            if (!param.second.empty()) {
@@ -65,6 +79,71 @@ public:

        return os.str();
    }
+
+    /**
+     * @brief Parse a required integer parameter from a parameter list
+     * @param params The parameter vector to search in
+     * @param paramName The name of the parameter to parse
+     * @return The parsed integer value
+     * @throws std::runtime_error if parameter is not found or cannot be parsed
+     */
+    static int parseRequiredParamAsInt(
+		const std::vector<std::pair<std::string,std::string>>& params,
+		const std::string& paramName
+		)
+    {
+        auto it = std::find_if(
+            params.begin(),
+            params.end(),
+            [&paramName](const auto& param) {
+                return param.first == paramName;
+            }
+        );
+
+        if (it == params.end())
+        {
+            throw std::runtime_error(
+                "No " + paramName + " specified in params");
+        }
+
+        try {
+            return std::stoi(it->second);
+        } catch (const std::exception& e) {
+            throw std::runtime_error(
+                "Failed to parse '" + paramName + "' param value '"
+                + it->second + "' as integer: " + e.what());
+        }
+    }
+
+    /**
+     * @brief Parse an optional integer parameter from a parameter list using synonyms
+     * @param params The parameter vector to search in
+     * @param synonymNames The collection of synonymous parameter names to try
+     * @param defaultValue The default value to return if no parameter is found
+     * @return The parsed integer value, or defaultValue if none found
+     * @note Synonyms are tried in reverse order; lattermost synonym wins if multiple are present
+     */
+    static int parseOptionalParamAsIntWithSynonyms(
+		const std::vector<std::pair<std::string,std::string>>& params,
+		const std::vector<std::string>& synonymNames,
+		int defaultValue
+		)
+    {
+        // Loop through synonyms in reverse order; lattermost synonym wins.
+        for (auto synIt = synonymNames.rbegin();
+            synIt != synonymNames.rend(); ++synIt)
+        {
+            const auto& paramName = *synIt;
+            try {
+                return parseRequiredParamAsInt(params, paramName);
+            } catch (const std::exception&) {
+                // Parameter not found or parse error, continue to next synonym
+                continue;
+            }
+        }
+
+        return defaultValue;
+    }
 };

 class InteroceptorDevAttachmentSpec : public DeviceAttachmentSpec
@@ -0,0 +1,64 @@
+#ifndef _LIVOX_GEN1_FRAME_ASSEMBLY_DESC_H
+#define _LIVOX_GEN1_FRAME_ASSEMBLY_DESC_H
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+#include <string>
+#include <sstream>
+
+namespace smo {
+namespace stim_buff {
+
+class FrameAssemblyDesc
+{
+public:
+	struct SlotDesc
+	{
+		size_t offsetBytes;      // offset from frame base
+		uint8_t* vaddr;          // direct pointer into StagingBuffer memory
+		size_t nBytes;           // slot capacity in bytes
+	};
+
+public:	
+	FrameAssemblyDesc() = default;
+
+	FrameAssemblyDesc(
+		size_t n, size_t slotSize,
+		size_t frameStride,
+		std::vector<SlotDesc> slotList)
+		: numSlots(n), slotSizeBytes(slotSize),
+		frameStrideBytes(frameStride),
+		slots(std::move(slotList)) {}
+
+	inline std::string stringify() const {
+		std::ostringstream oss;
+		oss << "FrameAssemblyDesc{"
+			<< "numSlots=" << numSlots
+			<< ", slotSizeBytes=" << slotSizeBytes
+			<< ", frameStrideBytes=" << frameStrideBytes
+			<< ", slots=[";
+		const size_t preview = slots.size() < 4 ? slots.size() : 4;
+		for (size_t i = 0; i < preview; ++i) {
+			oss << "{off=" << slots[i].offsetBytes
+				<< ", nBytes=" << slots[i].nBytes
+				<< ", vaddr=" << (const void*)slots[i].vaddr << "}";
+			if (i + 1 < preview) oss << ",";
+		}
+		if (slots.size() > preview) oss << ", ...";
+		oss << "]}";
+		return oss.str();
+	}
+
+public:
+	size_t numSlots;
+	size_t slotSizeBytes;
+	size_t frameStrideBytes;
+	std::vector<SlotDesc> slots;
+};
+
+} // namespace stim_buff
+} // namespace smo
+
+#endif // _LIVOX_GEN1_FRAME_ASSEMBLY_DESC_H
+
@@ -4,19 +4,64 @@
 #include <stdbool.h>
 #include <optional>
 #include <string>
+#include <functional>
 #include <memory>
+#include <vector>
 #include <preprocessor.h>
 #include <user/deviceAttachmentSpec.h>
+#include <callback.h>
+#define CL_TARGET_OPENCL_VERSION 120
+#include <CL/cl.h>
+
+class OptionParser;

 namespace smo {
-namespace sense_api {
+
+class ComponentThread;
+
+namespace compute {
+class ClBuffer;
+class ComputeDevice;
+} // namespace compute
+
+namespace stim_buff {
+
+/**
+ * @brief Threading model descriptor for senseApi libraries.
+ *
+ * This structure provides senseApi libraries with access to the information and
+ * resources they need to operate with SMO's threading model.
+ */
+struct SmoThreadingModelDesc
+{
+	/**
+	 * @brief sh_ptr to ComponentThread for device-independent state mgt.
+	 *
+	 * This ComponentThread should be used by senseApis for state management
+	 * that's independent of any particular device or attachment spec.
+	 * SMO will usually pass in the Marionette thread here.
+	 *
+	 * State management that's tied to a particular attachment spec should be
+	 * done on the ComponentThread for the thread that SMO provided in the
+	 * attachDeviceReq call.
+	 */
+	std::shared_ptr<ComponentThread> componentThread;
+};
+
+typedef std::function<void(bool, std::shared_ptr<device::DeviceAttachmentSpec>)>
+	sal_mlo_attachDeviceReqCbFn;
+typedef std::function<void(bool, std::shared_ptr<device::DeviceAttachmentSpec>)>
+	sal_mlo_detachDeviceReqCbFn;

 typedef int (sal_mlo_initializeIndFn)(void);
 typedef int (sal_mlo_finalizeIndFn)(void);
-typedef int (sal_mlo_attachDeviceReqFn)(
-    const std::shared_ptr<device::DeviceAttachmentSpec>& desc);
-typedef int (sal_mlo_detachDeviceReqFn)(
-    const std::shared_ptr<device::DeviceAttachmentSpec>& desc);
+typedef void (sal_mlo_attachDeviceReqFn)(
+    const std::shared_ptr<device::DeviceAttachmentSpec>& desc,
+    const std::shared_ptr<ComponentThread>& componentThread,
+    Callback<sal_mlo_attachDeviceReqCbFn> cb);
+typedef void (sal_mlo_detachDeviceReqFn)(
+    const std::shared_ptr<device::DeviceAttachmentSpec>& desc,
+    Callback<sal_mlo_detachDeviceReqCbFn> cb);

 /**
 * @brief Hooks provided by Salmanoff to senseApi libraries.
@@ -24,7 +69,7 @@ typedef int (sal_mlo_detachDeviceReqFn)(
 * This structure contains function pointers that senseApi libraries can use
 * to interact with Salmanoff's functionality, such as searching for commonLibs.
 */
-struct SalmanoffCallbacks
+struct SmoCallbacks
 {
    /**
     * @brief Search for a library in Salmanoff's search paths
@@ -37,11 +82,61 @@ struct SalmanoffCallbacks
     */
    std::optional<std::string> (*searchForLibInSmoSearchPaths)(
        const std::string& libraryPath);
+
+    /**
+     * @brief Get the current ComponentThread instance
+     * @return Shared pointer to the current ComponentThread
+     *
+     * This function provides access to the current ComponentThread instance,
+     * equivalent to calling ComponentThread::getSelf().
+     */
+    std::shared_ptr<ComponentThread> (*ComponentThread_getSelf)(void);
+
+    /**
+     * @brief Get the OptionParser singleton instance
+     * @return Reference to the OptionParser singleton
+     *
+     * This function provides access to the OptionParser singleton instance,
+     * equivalent to calling OptionParser::getOptions().
+     */
+    OptionParser& (*OptionParser_getOptions)(void);
+
+    /**
+     * @brief Create a USE_HOST_PTR buffer on all OpenCL contexts
+     * @param hostPtr Host pointer to the memory
+     * @param size Size of the buffer in bytes
+     * @param flags Additional OpenCL memory flags
+     * @return Shared pointer to ClBuffer managing buffers on all devices
+     */
+    std::shared_ptr<smo::compute::ClBuffer>
+	(*ComputeManager_createUseHostPtrBuffer)(
+        void* hostPtr, size_t size, cl_mem_flags flags);
+
+    /**
+     * @brief Release USE_HOST_PTR buffers from all contexts
+     * @param buffer Shared pointer to ClBuffer to release
+     */
+    void (*ComputeManager_releaseUseHostPtrBuffer)(
+        std::shared_ptr<smo::compute::ClBuffer> buffer);
+
+    /**
+     * @brief Get a compute device
+     * @return Shared pointer to ComputeDevice, or nullptr if no devices available
+     */
+    std::shared_ptr<smo::compute::ComputeDevice>
+	(*ComputeManager_getDevice)(void);
+
+    /**
+     * @brief Release a compute device
+     * @param device Shared pointer to ComputeDevice to release
+     */
+    void (*ComputeManager_releaseDevice)(
+		std::shared_ptr<smo::compute::ComputeDevice> device);
 };

 struct Sal_Mgmt_LibOps
 {
-    /* When Salmanoff loads a sense API lib, it calls this function to initialize
+    /* When Salmanoff loads a stim buff API lib, it calls this function to initialize
     * the lib. When this returns, the lib should be ready to attach devices.
     */
    sal_mlo_initializeIndFn *initializeInd;
@@ -50,7 +145,7 @@ struct Sal_Mgmt_LibOps
     */
    sal_mlo_finalizeIndFn *finalizeInd;
    /* Salmanoff calls this to attach a device to the lib. When it returns, the
-     * device should be attached and ready to be implexed.
+     * device should be attached and ready to present its stimbuff.
     */
    sal_mlo_attachDeviceReqFn *attachDeviceReq;
    // When this returns, the device should be detached.
@@ -68,16 +163,16 @@ struct Sal_Mgmt_LibOps
    }
 };

-/* Exported by all sense API Libraries to tell Salmanoff what API the lib uses
- * to connect to providers; and also to state which implexor APIs it exports.
+/* Exported by all stim buff API Libraries to tell Salmanoff what API the lib uses
+ * to connect to providers; and also to state which quale-iface APIs it exports.
 */
-class SenseApiDesc
+class StimBuffApiDesc
 {
 public:
-    class ExportedImplexorApiDesc
+    class ExportedQualeIfaceApiDesc
    {
    public:
-        static bool sanityCheck(const ExportedImplexorApiDesc &desc)
+        static bool sanityCheck(const ExportedQualeIfaceApiDesc &desc)
        {
            if (desc.name.empty()) { return false; }
            return true;
@@ -91,54 +186,57 @@ public:
    std::string stringify() const
    {
        std::string result = "Name: " + name + "\n";
-        result += "Exported Implexor APIs:\n";
-        for (const auto& api : exportedImplexorApis) {
+        result += "Exported QualeIface APIs:\n";
+        for (const auto& api : exportedQualeIfaceApis) {
            result += "  - " + api.name + "\n";
        }
        return result;
    }

-    static bool sanityCheck(const SenseApiDesc &desc)
+    static bool sanityCheck(const StimBuffApiDesc &desc)
    {
-        if (desc.name.empty() || desc.exportedImplexorApis.empty()) {
+        if (desc.name.empty() || desc.exportedQualeIfaceApis.empty()) {
            return false;
        }

-        for (const auto& api : desc.exportedImplexorApis) {
-            if (!ExportedImplexorApiDesc::sanityCheck(api)) { return false; }
+        for (const auto& api : desc.exportedQualeIfaceApis) {
+            if (!ExportedQualeIfaceApiDesc::sanityCheck(api)) { return false; }
        }

        return Sal_Mgmt_LibOps::sanityCheck(desc.sal_mgmt_libOps);
    }

    std::string name;
-    // These are the implexors whose APIs this lib exports.
-    std::vector<ExportedImplexorApiDesc> exportedImplexorApis;
+    // These are the quale-iface APIs this lib exports.
+    std::vector<ExportedQualeIfaceApiDesc> exportedQualeIfaceApis;
    Sal_Mgmt_LibOps sal_mgmt_libOps;
 };


-#define SMO_GET_SENSE_API_DESC_FN_NAME getSenseApiDesc
-#define SMO_GET_SENSE_API_DESC_FN_NAME_STR \
-    SMO_QUOTE(SMO_GET_SENSE_API_DESC_FN_NAME)
-#define SMO_GET_SENSE_API_DESC_FN_TYPEDEF \
-    SMO_CONCAT(SMO_GET_SENSE_API_DESC_FN_NAME, Fn)
+#define SMO_GET_STIM_BUFF_API_DESC_FN_NAME getStimBuffApiDesc
+#define SMO_GET_STIM_BUFF_API_DESC_FN_NAME_STR \
+    SMO_QUOTE(SMO_GET_STIM_BUFF_API_DESC_FN_NAME)
+#define SMO_GET_STIM_BUFF_API_DESC_FN_TYPEDEF \
+    SMO_CONCAT(SMO_GET_STIM_BUFF_API_DESC_FN_NAME, Fn)

-/* Every Sense API library must define a global instance of this
+/* Every Stim Buff API library must define a global instance of this
 * function. Salmanoff will search for it and invoke it via dlsym().
 *
- * The function must return a SenseApiDesc struct that Smo will tell
- * Smo what implexors can be used with it & what APIs it exports.
- * The SenseApiDesc struct also gives Smo pointers to API functions
+ * The function must return a StimBuffApiDesc struct that Smo will tell
+ * Smo what quale-iface APIs can be used with it & what APIs it exports.
+ * The StimBuffApiDesc struct also gives Smo pointers to API functions
 * to invoke for communication between Smo and the library.
 *
- * The SalmanoffCallbacks parameter provides the library with access to
+ * The SmoCallbacks parameter provides the library with access to
 * Salmanoff's hooks.
+ * The SmoThreadingModelDesc parameter provides the library with access to
+ * the io_service for network operations and event handling.
 */
-typedef const SenseApiDesc &(SMO_GET_SENSE_API_DESC_FN_TYPEDEF)(
-    const SalmanoffCallbacks& callbacks);
+typedef const StimBuffApiDesc &(SMO_GET_STIM_BUFF_API_DESC_FN_TYPEDEF)(
+    const SmoCallbacks& callbacks,
+    const SmoThreadingModelDesc& threadingModel);

-} // namespace sense_api
+} // namespace stim_buff
 } // namespace smo

 #endif // __USER_SENSE_API_LIB_H__
@@ -0,0 +1,76 @@
+#ifndef _SEQUENCE_LOCK_H
+#define _SEQUENCE_LOCK_H
+
+#include <atomic>
+#include <optional>
+
+namespace smo {
+
+/**
+ * @brief Sequence lock synchronization primitive
+ *
+ * A reader-writer synchronization primitive where writers increment the
+ * sequence number (odd = writing in progress, even = stable) and readers
+ * check the sequence number to detect concurrent modifications.
+ */
+class SequenceLock
+{
+public:
+	SequenceLock()
+	: sequenceNo(0)
+	{}
+
+	~SequenceLock() = default;
+
+	// Non-copyable, non-movable (std::atomic is neither copyable nor movable)
+	SequenceLock(const SequenceLock&) = delete;
+	SequenceLock& operator=(const SequenceLock&) = delete;
+	SequenceLock(SequenceLock&&) = delete;
+	SequenceLock& operator=(SequenceLock&&) = delete;
+
+	/* Atomically increments sequenceNo and issues a release barrier.
+	 * Makes the sequence number odd, indicating a write is in progress.
+	 */
+	void writeAcquire()
+		{ sequenceNo.fetch_add(1, std::memory_order_release); }
+
+	/* Atomically increments sequenceNo and issues a release barrier.
+	 * Makes the sequence number even again, indicating write is complete.
+	 */
+	void writeRelease()
+		{ sequenceNo.fetch_add(1, std::memory_order_release); }
+
+	/* Issues an acquire barrier and checks if the sequence number is even
+	 * (stable state). If odd (writer active), returns nullopt. Otherwise
+	 * returns the sequence number.
+	 *
+	 * @return std::nullopt if writer is active, otherwise the sequence number
+	 */
+	std::optional<size_t> readAcquire()
+	{
+		size_t seq = sequenceNo.load(std::memory_order_acquire);
+		if (seq & 1) {
+			return std::nullopt;
+		}
+		return seq;
+	}
+
+	/* Issues an acquire barrier and checks if the sequence number matches
+	 * the original value from readAcquire(). If equal, the read was consistent.
+	 *
+	 * @param originalSequenceNo The sequence number obtained from readAcquire()
+	 * @return true if read was consistent, false if writer modified during read
+	 */
+	bool readRelease(size_t originalSequenceNo)
+	{
+		size_t seq = sequenceNo.load(std::memory_order_acquire);
+		return seq == originalSequenceNo;
+	}
+
+private:
+	std::atomic<size_t> sequenceNo;
+};
+
+} // namespace smo
+
+#endif // _SEQUENCE_LOCK_H
@@ -0,0 +1,157 @@
+#ifndef _SP_MC_RING_BUFFER_H
+#define _SP_MC_RING_BUFFER_H
+
+#include <vector>
+#include <cstddef>
+#include <stdexcept>
+#include <string>
+#include <new>
+#include <memory>
+#include <user/stimulusFrame.h>
+#include <user/frameAssemblyDesc.h>
+#include <user/sequenceLock.h>
+#include <user/senseApiDesc.h>
+#define CL_TARGET_OPENCL_VERSION 120
+#include <CL/cl.h>
+
+namespace smo {
+namespace stim_buff {
+
+/**
+ * @brief Single-producer, multi-consumer ring buffer w/per-slot sequence locks
+ *
+ * A ring buffer that maintains data alignment constraints while providing
+ * lock-free read access through per-slot sequence locks. The locks are kept
+ * separate from the data to preserve alignment requirements for the input
+ * engine.
+ */
+class SpMcRingBuffer
+{
+public:
+	/**	EXPLANATION:
+	 * Constructor initializes the ring buffer with FrameAssemblyDesc.
+	 * Allocates frames vector with properly constructed StimulusFrame instances,
+	 * each initialized with a SlotDesc from the FrameAssemblyDesc.
+	 */
+	explicit SpMcRingBuffer(
+		const std::shared_ptr<FrameAssemblyDesc> &frameAssemblyDesc_,
+		const SmoCallbacks& callbacks,
+		cl_mem_flags flags)
+	:
+	nBuffers(frameAssemblyDesc_ ? frameAssemblyDesc_->slots.size() : 0),
+	frameAssemblyDesc(frameAssemblyDesc_),
+	slots(nBuffers),  // Default-construct all frames
+	producerNextUsableIndex(0)
+	{
+		if (!frameAssemblyDesc)
+		{
+			throw std::invalid_argument(std::string(__func__)
+				+ ": SpMcRingBuffer: frameAssemblyDesc must not be null");
+		}
+
+		if (nBuffers == 0)
+		{
+			throw std::invalid_argument(std::string(__func__)
+				+ ": SpMcRingBuffer: frameAssemblyDesc must have at least one "
+				"slot");
+		}
+
+		// Re-invoke constructors w/placement new on default-constructed frames
+		for (size_t i = 0; i < nBuffers; ++i)
+		{
+			slots[i].~StimulusFrame();  // Destroy default-constructed object
+			new (&slots[i]) StimulusFrame(
+				frameAssemblyDesc->slots[i], callbacks, flags, i);
+		}
+	}
+
+	~SpMcRingBuffer() = default;
+
+	// Non-copyable, non-movable (slots are non-movable)
+	SpMcRingBuffer(const SpMcRingBuffer&) = delete;
+	SpMcRingBuffer& operator=(const SpMcRingBuffer&) = delete;
+	SpMcRingBuffer(SpMcRingBuffer&&) = delete;
+	SpMcRingBuffer& operator=(SpMcRingBuffer&&) = delete;
+
+public:
+	/**
+	 * @brief Get a reference to the StimulusFrame at the specified slot
+	 *
+	 * @param slotIndex The index of the slot (0-based)
+	 * @return Reference to StimulusFrame at the slot
+	 * @throws std::out_of_range if slotIndex >= nBuffers
+	 */
+	StimulusFrame& getDataAtSlot(size_t slotIndex)
+	{
+		if (slotIndex >= nBuffers)
+		{
+			throw std::out_of_range(std::string(__func__)
+				+ ": SpMcRingBuffer: slotIndex must be < nBuffers");
+		}
+		return slots[slotIndex];
+	}
+
+	SequenceLock& getSequenceLockAtSlot(size_t slotIndex)
+	{
+		if (slotIndex >= nBuffers)
+		{
+			throw std::out_of_range(std::string(__func__)
+				+ ": SpMcRingBuffer: slotIndex must be < nBuffers");
+		}
+		return slots[slotIndex].lock;
+	}
+
+	/**
+	 * @brief Get the next index to produce into, atomically incrementing it
+	 *
+	 * Uses sequence lock to perform an emulated fetch_add with modulo nBuffers
+	 * applied, ensuring the returned index is always < nBuffers.
+	 *
+	 * @return The index to produce into (always < nBuffers)
+	 */
+	size_t getIndexToProduceInto()
+	{
+		producerNextUsableIndexLock.writeAcquire();
+		size_t currentIndex = producerNextUsableIndex;
+		size_t nextIndex = (currentIndex + 1) % nBuffers;
+		producerNextUsableIndex = nextIndex;
+		producerNextUsableIndexLock.writeRelease();
+		return currentIndex;
+	}
+
+	/**
+	 * @brief Abort production by setting the producer index to a specific value
+	 *
+	 * @param index The index to set (must be < nBuffers)
+	 * @throws std::out_of_range if index >= nBuffers
+	 */
+	void abortProduction(size_t index)
+	{
+		if (index >= nBuffers)
+		{
+			throw std::out_of_range(std::string(__func__)
+				+ ": SpMcRingBuffer: index must be < nBuffers");
+		}
+		producerNextUsableIndexLock.writeAcquire();
+		producerNextUsableIndex = index;
+		producerNextUsableIndexLock.writeRelease();
+	}
+
+public:
+	// Layout/invariants
+	size_t nBuffers;
+
+private:
+	// FrameAssemblyDesc describing the memory layout
+	std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
+	// Frames vector: each frame contains a sequence lock and SlotDesc
+	std::vector<StimulusFrame> slots;
+	SequenceLock producerNextUsableIndexLock;
+	size_t producerNextUsableIndex;
+};
+
+} // namespace stim_buff
+} // namespace smo
+
+#endif // _SP_MC_RING_BUFFER_H
+
@@ -0,0 +1,199 @@
+#ifndef STAGINGBUFFER_H
+#define STAGINGBUFFER_H
+
+#include <memory>
+#include <cstdint>
+#include <atomic>
+#include <string>
+#include <sstream>
+#include <sys/mman.h>
+#include <sys/uio.h>
+
+namespace smo {
+namespace stim_buff {
+
+// Forward declaration
+class FrameAssemblyDesc;
+
+/**
+ * StagingBuffer manages a large buffer to guide io_uring in assembling some
+ * number of Livox Avia pcloud UDP dgrams into a single stim frame.
+ *
+ * The buffer operates in a cycle:
+ * 1. io_uring assembles UDP dgrams into the buffer until it's full
+ * 2. Buffer is handed off to the stimbuff layer to be appended to the stimbuff.
+ * 3. When the stimbuff layer has appended the current assembled frame, the
+ *		assembly buffer is reset and cycle repeats.
+ */
+class StagingBuffer
+{
+public:
+	class IOEngineConstraints
+	{
+	public:
+		// Default constructor creates uninitialized constraints
+		IOEngineConstraints() = default;
+
+		IOEngineConstraints(
+			size_t slotStartAlignmentByteVal_,
+			size_t slotPadToNBytes_,
+			size_t frameStartAlignmentByteVal_,
+			size_t framePadToNBytes_)
+		: slotStartAlignmentByteVal(slotStartAlignmentByteVal_),
+		slotPadToNBytes(slotPadToNBytes_),
+		frameStartAlignmentByteVal(frameStartAlignmentByteVal_),
+		framePadToNBytes(framePadToNBytes_)
+		{}
+
+		~IOEngineConstraints() = default;
+
+		size_t slotStartAlignmentByteVal, slotPadToNBytes,
+			frameStartAlignmentByteVal, framePadToNBytes;
+
+		// Static defaults for io_uring and OpenCL
+		static const IOEngineConstraints ioUringConstraints;
+		static const IOEngineConstraints openClInputConstraints;
+
+		inline std::string stringify() const
+		{
+			std::ostringstream oss;
+			oss << "IOEngineConstraints{"
+				<< "slotStartAlignmentByteVal=" << slotStartAlignmentByteVal
+				<< ", slotPadToNBytes=" << slotPadToNBytes
+				<< ", frameStartAlignmentByteVal=" << frameStartAlignmentByteVal
+				<< ", framePadToNBytes=" << framePadToNBytes
+				<< "}";
+			return oss.str();
+		}
+	};
+
+public:
+	/**	EXPLANATION:
+	 * Default constructor creates uninitialized buffer.
+	 * Must be properly initialized using placement new with the parameterized constructor.
+	 */
+	StagingBuffer() = default;
+
+	/**	EXPLANATION:
+	 * We use the input and output engine constraints to determine the total
+	 * amount of memory required internally to assemble a single frame with
+	 * the given number of points per frame.
+	 */
+	explicit StagingBuffer(
+		const IOEngineConstraints& inputEngineConstraints,
+		const IOEngineConstraints& outputEngineConstraints,
+		size_t nSlots);
+	~StagingBuffer() = default;
+
+	// Non-copyable, movable
+	StagingBuffer(const StagingBuffer&) = delete;
+	StagingBuffer& operator=(const StagingBuffer&) = delete;
+	StagingBuffer(StagingBuffer&&) = default;
+	StagingBuffer& operator=(StagingBuffer&&) = default;
+
+public:
+	/**	EXPLANATION:
+	 * Returns an input-engine-agnostic descriptor describing per-frame packet
+	 * slot layout. Different input engines should be able to convert this into
+	 * engine-specific metadata. E.g: io_uring's SQE descriptor.
+	 */
+	operator std::shared_ptr<FrameAssemblyDesc>() const { return frameDesc; }
+	// operator OpenClSharedBufferDescriptor() const;
+
+	bool isAssembling() const { return assemblingFlag.load(); }
+	void startAssembly() { assemblingFlag.store(true); }
+	void stopAssembly() { assemblingFlag.store(false); }
+
+	/**	EXPLANATION:
+	 * Returns an iovec for io_uring registration.
+	 * The buffer is mmap()-allocated and suitable for IORING_REGISTER_BUFFERS.
+	 */
+	struct iovec getIoUringRegisterIoVec() const
+	{
+		struct iovec iov;
+		iov.iov_base = buffer.get();
+		iov.iov_len = bufferNBytes;
+		return iov;
+	}
+
+	/**	EXPLANATION:
+	 * Returns an iovec for OpenCL engine buffer access.
+	 * The buffer is mmap()-allocated and suitable for CL_MEM_USE_HOST_PTR.
+	 * Returns pointer to first slot (offset by firstSlotOffsetNBytes) and
+	 * size from first slot to end of buffer.
+	 */
+	struct iovec getClEngineIovec() const
+	{
+		struct iovec iov;
+		iov.iov_base = buffer.get() + firstSlotOffsetNBytes;
+		iov.iov_len = bufferNBytes - firstSlotOffsetNBytes;
+		return iov;
+	}
+
+	inline std::string stringify() const
+	{
+		std::ostringstream oss;
+		oss << "StagingBuffer{"
+			<< "nSlots=" << nSlots
+			<< ", bufferNBytes=" << bufferNBytes
+			<< ", slotStrideNBytes=" << slotStrideNBytes
+			<< ", constraints=" << inputConstraints.stringify()
+			<< "}";
+		return oss.str();
+	}
+
+private:
+	void computeSlotStrideAndBufferSize();
+	static size_t calculateFirstSlotOffsetAndValidate(
+		uint8_t* buffer,
+		size_t bufferNBytes,
+		size_t nSlots,
+		size_t slotStrideNBytes,
+		const IOEngineConstraints& inputConstraints);
+
+	// Custom deleter for mmap-allocated buffer
+	struct MmapDeleter
+	{
+		size_t size;
+		// Default constructor for use with default-constructed StagingBuffer
+		MmapDeleter() : size(0) {}
+		MmapDeleter(size_t s) : size(s) {}
+		
+		void operator()(uint8_t* ptr) const
+		{
+			if (ptr != nullptr && size > 0)
+			{
+				munlock(ptr, size);
+				munmap(ptr, size);
+			}
+		}
+	};
+
+	// Buffer data - mmap-allocated for io_uring registration
+	// Using unique_ptr<uint8_t, MmapDeleter> instead of array syntax
+	// since we have a custom deleter that knows the size
+	std::unique_ptr<uint8_t, MmapDeleter> buffer;
+	size_t bufferNBytes;
+
+	// Layout/invariants
+	size_t nSlots;
+
+public:
+	size_t slotStrideNBytes;
+	size_t firstSlotOffsetNBytes;  // offset from buffer start to first slot
+
+private:
+	IOEngineConstraints inputConstraints;
+
+	// Descriptor (computed once; reused across frames)
+	mutable std::shared_ptr<FrameAssemblyDesc> frameDesc;
+
+	// Current state
+	std::atomic<size_t> currentNBytes;
+	std::atomic<bool> assemblingFlag;
+};
+
+} // namespace stim_buff
+} // namespace smo
+
+#endif // STAGINGBUFFER_H
@@ -0,0 +1,83 @@
+#ifndef _STENCIL_H
+#define _STENCIL_H
+
+#include <vector>
+#include <memory>
+#include <user/stimFrame.h>
+#include <mentalEntity.h>
+
+namespace smo {
+namespace cologex {
+
+/**
+ * Stencil represents range descriptions for sub-regions of sensor data frames.
+ *
+ * When a sensor yields frames with multiple values per frame, the Stencil class
+ * allows the stimbufflib driver to describe the subset of the input data that
+ * is relevant to SMO. For example:
+ *
+ * * A HSB format camera might treat brightness values above 128 as
+ *   negtrins, creating a Stencil that denotes all offsets in a
+ *   frame that exceed 128.
+ *
+ * * A lidar yielding XYZI[ntensity] might consider I values exceeding 128 to be
+ *   negtrins, creating a Stencil listing all values in the point
+ *   cloud that exceed 128.
+ *
+ * The Stencil internally represents offsets with ranges or other efficient
+ * formats to describe offsets (e.g., by row). The internal format is opaque to
+ * the stimbufflib, which describes relevant ranges by calling Stencil methods.
+ */
+class Stencil
+{
+public:
+	/**
+	 * Constructor that takes a shared_ptr to StimFrame and produces a completed
+	 * Stencil. The Stencil scans the StimFrame and efficiently allocates
+	 * internal structures to describe the stencil ranges.
+	 *
+	 * @param frame Shared pointer to the StimFrame to analyze
+	 * @param threshold The threshold value for determining relevant data
+	 */
+	Stencil(
+		const std::shared_ptr<stim_buff::StimFrame> &frame,
+		const uint32_t threshold)
+	: frame(frame), threshold(threshold)
+	{}
+	virtual ~Stencil() = default;
+
+	/**
+	 * Pure virtual method for derived classes to implement their specific
+	 * threshold analysis logic. Returns true if there are values above threshold,
+	 * false otherwise.
+	 */
+	virtual bool analyzeFrame() = 0;
+
+	/**
+	 * Stencil is constructed from a StimFrame. If the input StimFrame had no
+	 * values above threshold, then the Stencil will have no data.
+	 */
+	virtual bool hasData() const = 0;
+	operator bool() const { return hasData(); }
+	bool operator!() const { return !hasData(); }
+
+	// Return the number of relevant ranges/offsets in this Stencil.
+	virtual size_t getRelevantCount() const = 0;
+	// Return true if the offset is relevant, false otherwise
+	virtual bool isRelevant(size_t offset) const = 0;
+
+	/**
+	 * Build internal stencil metadata from the shared_ptr member to describe
+	 * the range of StimFrame values that are relevant.
+	 */
+	virtual bool buildStencilMetadata() = 0;
+
+protected:
+	uint32_t threshold;
+	std::shared_ptr<stim_buff::StimFrame> frame;
+};
+
+} // namespace cologex
+} // namespace smo
+
+#endif // _STENCIL_H
@@ -0,0 +1,71 @@
+#ifndef _STIMULUS_BUFFER_H
+#define _STIMULUS_BUFFER_H
+
+#include <config.h>
+#include <vector>
+#include <memory>
+#include <user/spMcRingBuffer.h>
+#include <user/stagingBuffer.h>
+#include <user/frameAssemblyDesc.h>
+#include <user/senseApiDesc.h>
+#include "stimulusFrame.h"
+#include "deviceAttachmentSpec.h"
+#define CL_TARGET_OPENCL_VERSION 120
+#include <CL/cl.h>
+
+namespace smo {
+namespace stim_buff {
+
+// Forward declaration
+class StimulusProducer;
+
+/**
+ * StimulusBuffer manages a collection of stimulus frames and ring buffer.
+ *
+ * This buffer holds the actual frame storage and ring buffer for stimulus
+ * data. It maintains a reference to its parent StimulusProducer.
+ */
+class StimulusBuffer
+{
+public:
+	explicit StimulusBuffer(
+		StimulusProducer& parent,
+		const std::shared_ptr<device::DeviceAttachmentSpec>
+			&deviceAttachmentSpec,
+		int histbuffMs,
+		const StagingBuffer::IOEngineConstraints& inputEngineConstraints,
+		const StagingBuffer::IOEngineConstraints& outputEngineConstraints,
+		const SmoCallbacks& callbacks,
+		cl_mem_flags flags)
+	: parent(parent),
+	deviceAttachmentSpec(deviceAttachmentSpec),
+	histbuffMs(histbuffMs),
+	stagingBuffer(
+		inputEngineConstraints,
+		outputEngineConstraints,
+		static_cast<size_t>(histbuffMs / CONFIG_STIMBUFF_FRAME_PERIOD_MS)),
+	ringBuffer(
+		static_cast<std::shared_ptr<FrameAssemblyDesc>>(stagingBuffer),
+		callbacks, flags)
+	{}
+
+	virtual ~StimulusBuffer() = default;
+
+	// Non-copyable, movable
+	StimulusBuffer(const StimulusBuffer&) = delete;
+	StimulusBuffer& operator=(const StimulusBuffer&) = delete;
+	StimulusBuffer(StimulusBuffer&&) = default;
+	StimulusBuffer& operator=(StimulusBuffer&&) = default;
+
+public:
+	StimulusProducer& parent;
+	std::shared_ptr<device::DeviceAttachmentSpec> deviceAttachmentSpec;
+	int histbuffMs;
+	StagingBuffer stagingBuffer;
+	SpMcRingBuffer ringBuffer;
+};
+
+} // namespace stim_buff
+} // namespace smo
+
+#endif // _STIMULUS_BUFFER_H
@@ -0,0 +1,123 @@
+#ifndef _ATTACHMENT_SUPPORT_STIMULUS_FRAME_H
+#define _ATTACHMENT_SUPPORT_STIMULUS_FRAME_H
+
+#include <cstdint>
+#include <memory>
+#include <user/frameAssemblyDesc.h>
+#include <user/sequenceLock.h>
+#include <user/compute.h>
+#include <user/senseApiDesc.h>
+#define CL_TARGET_OPENCL_VERSION 120
+#include <CL/cl.h>
+
+namespace smo {
+namespace stim_buff {
+
+/**	EXPLANATION:
+ * A simultaneity stamp is a timestamp that is used to determine whether two
+ * stimulus frames occured simultaneously. Its purpose is adamantly *NOT* to
+ * record or denote the "absolute" time of the stimulus frames. I cannot stress
+ * this enough. The simultaneity stamp is NOT used to record "when" the stimulus
+ * frame occured. It is used *SOLELY* to record that two or more stimulus frames
+ * occured at the same time.
+ *
+ * The SMO has absolutely no notion of "absolute" time. It only has a notion of
+ * simultaneity among stimulus frames. Any notions of "absolute" time are built
+ * up consciously and volitionally by the running mind, and not baked into the
+ * underlying software (i.e: Salmanoff).
+ *
+ * We need about 36 bits of unique simultaneity per year, assuming that we only
+ * expect to capture 1000 stim frames per second. 1000 is a lot of stim frames
+ * per second. If we use a 64 bit integer, that leaves us with 2^28 years
+ * before our simultaneity stamps roll over. That's 256 million years.
+ *
+ * The calculation we used to arrive at 36 bits is as follows:
+ *	hex(86400 * 400 * 1000) = 0x80befc000
+ *	* 86400 = seconds per day.
+ *	* 400 = days per year.
+ *	* 1000 = stim frames per second.
+ * As you can see, our extremely cautious calculation resulted in 36 bits.
+ * If we use a UUID (128 bits), we can basically be fairly sure we won't
+ * rollover for ...aeons. Now the question is: should we use a UUID or a 64 bit
+ * integer?
+ *
+ * It's important to note that simultaneity stamps are not used in all mental
+ * entities. They're only used in raw chronomena recordings, and possibly
+ * also in artificed memory chronomena. Among the artificed chronomena, their
+ * simultaneity lifetime is usually self-contained. Only the raw, observed
+ * chronomena have to retain a lifetime that is basically "the person's
+ * lifespan" (though not even necessarily that long).
+ * 
+ * It may not even necessarily need to be lifespan-unique because the purpose of
+ * simultaneity stamps is to denote simultaneity among the stim frames that are
+ * __actually stored__ in the mind's memories. So if we forgot all stim frames
+ * with simultaneity stamps that older than say, 1000, then we can re-use all
+ * the simultaneity stamps that are numerically less than 1000. So there's some
+ * dynamic recycling, and we can prolly keep track of the oldest simultaneity
+ * stamp that we are currently using.
+ *
+ * Also, since simultaneity stamps are *NOT* used to record "when" the stimulus
+ * frame occured, we can also periodically run a reclaiming daemon process on
+ * our stored memories, which will try to defragment the simultaneity stamps
+ * in use by currently stored chronomena. Or we can silently mutate the
+ * simultaneity stamps of chronomena when committing them to backing storage;
+ * as well as when loading them from backing storage.
+ */
+typedef uint64_t SimultaneityStamp;
+
+class StimulusFrame
+{
+public:
+	/**	EXPLANATION:
+	 * Default constructor creates uninitialized frame.
+	 * Must be properly initialized using placement new with the parameterized constructor.
+	 */
+	StimulusFrame() = default;
+
+	StimulusFrame(
+		const FrameAssemblyDesc::SlotDesc& slotDesc_,
+		const SmoCallbacks& callbacks,
+		cl_mem_flags flags,
+		size_t ringBufferIndex_)
+	: slotDesc(slotDesc_),
+	ringBufferIndex(ringBufferIndex_)
+	{
+		if (!callbacks.ComputeManager_createUseHostPtrBuffer)
+		{
+			throw std::runtime_error(std::string(__func__)
+				+ ": StimulusFrame: ComputeManager_createUseHostPtrBuffer "
+				"callback is null");
+		}
+
+		clBuffer = callbacks.ComputeManager_createUseHostPtrBuffer(
+			slotDesc.vaddr, slotDesc.nBytes, flags);
+
+		if (!clBuffer)
+		{
+			throw std::runtime_error(std::string(__func__)
+				+ ": StimulusFrame: failed to create clBuffer");
+		}
+
+//		std::cout << __func__ << ": StimulusFrame: created clBuffer with size " << slotDesc.nBytes << " bytes @ " << (const void*)slotDesc.vaddr << std::endl;
+	}
+
+	~StimulusFrame() = default;
+
+	// Non-copyable, movable
+	StimulusFrame(const StimulusFrame&) = delete;
+	StimulusFrame& operator=(const StimulusFrame&) = delete;
+	StimulusFrame(StimulusFrame&&) = default;
+	StimulusFrame& operator=(StimulusFrame&&) = default;
+
+public:
+	SequenceLock lock;
+	SimultaneityStamp simultaneityStamp;
+	FrameAssemblyDesc::SlotDesc slotDesc;
+	std::shared_ptr<smo::compute::ClBuffer> clBuffer;
+	size_t ringBufferIndex;
+};
+
+} // namespace stim_buff
+} // namespace smo
+
+#endif // _ATTACHMENT_SUPPORT_STIMULUS_FRAME_H
@@ -0,0 +1,119 @@
+#ifndef _STIMULUS_PRODUCER_H
+#define _STIMULUS_PRODUCER_H
+
+#include <boostAsioLinkageFix.h>
+#include <vector>
+#include <memory>
+#include <cstdint>
+#include <atomic>
+#include <mutex>
+#include <functional>
+#include <iostream>
+#include <chrono>
+#include <config.h>
+#include <boost/asio/io_service.hpp>
+#include <boost/asio/deadline_timer.hpp>
+#include <spinLock.h>
+#include "deviceAttachmentSpec.h"
+
+namespace smo {
+namespace stim_buff {
+
+// Forward declaration
+class StimulusBuffer;
+
+/**
+ * StimulusProducer manages a collection of stimulus frames with simultaneity stamps.
+ *
+ * This producer is designed to hold stimulus frames that have been assembled
+ * from raw sensor data (e.g., Livox Avia point cloud data) and are ready
+ * for processing by the mind layer.
+ *
+ * The producer provides thread-safe operations for adding frames, retrieving
+ * frames, and managing the producer state.
+ */
+class StimulusProducer
+{
+public:
+	explicit StimulusProducer(
+		const std::shared_ptr<device::DeviceAttachmentSpec>
+			&deviceAttachmentSpec,
+		boost::asio::io_service& ioService_)
+	: deviceAttachmentSpec(deviceAttachmentSpec),
+	ioService(ioService_),
+	shouldContinue(false), timer(ioService),
+	nDeferrals(0)
+	{}
+
+	virtual ~StimulusProducer() = default;
+
+	// Non-copyable, movable
+	StimulusProducer(const StimulusProducer&) = delete;
+	StimulusProducer& operator=(const StimulusProducer&) = delete;
+	StimulusProducer(StimulusProducer&&) = default;
+	StimulusProducer& operator=(StimulusProducer&&) = default;
+
+	// Control methods
+	virtual void start()
+	{
+		std::cout << __func__ << ": Starting stimulus producer for device "
+			<< deviceAttachmentSpec->deviceSelector << std::endl;
+
+		shouldContinue = true;
+		nDeferrals = 0;
+		scheduleNextTimeout();
+	}
+
+	virtual void stop();
+
+	void allowNextStimulusFrame()
+		{ frameAssemblyRateLimiter.release(); }
+
+	virtual std::shared_ptr<StimulusBuffer> getAttachedStimulusBuffer(
+		const std::shared_ptr<device::DeviceAttachmentSpec>& spec) const;
+
+	virtual std::shared_ptr<StimulusBuffer> getOrCreateAttachedStimulusBuffer(
+		const std::shared_ptr<device::DeviceAttachmentSpec>
+			&deviceAttachmentSpec) = 0;
+
+	virtual void destroyAttachedStimulusBuffer(
+		const std::shared_ptr<StimulusBuffer>& buffer);
+
+	// Check if any attached buffer has the specified qualeIfaceApi
+	bool hasBufferWithQualeIfaceApi(const std::string& qualeIfaceApi) const;
+
+protected:
+	SpinLock frameAssemblyRateLimiter;
+
+	// Virtual functions for derived classes to override
+	virtual int getStopDelayMs() const
+	{
+		return CONFIG_STIMBUFF_FRAME_PERIOD_MS;
+	}
+
+	virtual void stimFrameProductionTimesliceInd() = 0;
+
+private:
+	void onTimeout(const boost::system::error_code& error);
+
+public:
+	std::shared_ptr<device::DeviceAttachmentSpec> deviceAttachmentSpec;
+	std::vector<std::shared_ptr<StimulusBuffer>> attachedStimulusBuffers;
+
+private:
+	boost::asio::io_service& ioService;
+protected:
+	SpinLock shouldContinueLock;
+	bool shouldContinue;
+private:
+	boost::asio::deadline_timer timer;
+	size_t nDeferrals;
+	std::chrono::high_resolution_clock::time_point deferralStartTime;
+
+	void scheduleNextTimeout(int delayMs = CONFIG_STIMBUFF_FRAME_PERIOD_MS);
+};
+
+} // namespace stim_buff
+} // namespace smo
+
+#endif // _STIMULUS_PRODUCER_H
@@ -1,26 +1,28 @@
 #include <iostream>
+#include <pthread.h>
 #include <componentThread.h>
 #include <marionette/marionette.h>


 int main(int argc, char *argv[], char *envp[])
 {
+	pthread_setname_np(pthread_self(), "smo:CRT:main");
 	/* We don't do anything inside of main()
 	 * Main merely waits for the marionette thread to exit.
 	 */
 	std::cout << "CRT:" << __func__ << ": about to JOLT Mrntt with cmdline args"
 		<< '\n';
-	smo::mrntt::mrntt->getIoService().post(
+	smo::mrntt::thread->getIoService().post(
 		[argc, argv, envp]()
 		{
 			std::cout << "Mrntt:" << __func__ << ":JOLTED: setting cmdline args"
 				<< '\n';
 			smo::CrtCommandLineArgs::set(argc, argv, envp);
-			smo::mrntt::mrntt->getIoService().stop();
+			smo::mrntt::thread->getIoService().stop();
 		}
 	);

-	smo::mrntt::mrntt->thread.join();
+	smo::mrntt::thread->thread.join();
 	std::cout << "CRT:" << __func__ << ": Mrntt exited with code '"
 		<< smo::mrntt::exitCode << "'\n";
 	return smo::mrntt::exitCode;
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Simple script to show just the C/C++ line count summary
+
+# Change to project root directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$PROJECT_ROOT"
+
+# Count lines
+CPP_LINES=$(find . -name "*.cpp" | grep -v third_party | grep -v build | grep -v b/ | grep -v "Livox-sdk-git" | grep -v CMakeFiles | xargs wc -l 2>/dev/null | tail -1 | awk '{print $1}' || echo "0")
+H_LINES=$(find . -name "*.h" -o -name "*.hpp" | grep -v third_party | grep -v build | grep -v b/ | grep -v "Livox-sdk-git" | grep -v CMakeFiles | xargs wc -l 2>/dev/null | tail -1 | awk '{print $1}' || echo "0")
+C_LINES=$(find . -name "*.c" | grep -v third_party | grep -v build | grep -v b/ | grep -v "Livox-sdk-git" | grep -v CMakeFiles | xargs wc -l 2>/dev/null | tail -1 | awk '{print $1}' || echo "0")
+
+TOTAL=$((CPP_LINES + H_LINES + C_LINES))
+
+echo "Salmanoff Project C/C++ Lines: $TOTAL"
+echo "  C++ Source: $CPP_LINES"
+echo "  Headers:    $H_LINES"
+echo "  C Source:   $C_LINES"
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+# Script to count C/C++ lines of code in the salmanoff project
+# Excludes third-party dependencies, build artifacts, and generated files
+
+# set -e  # Commented out to prevent early exit on empty file lists
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}=== Salmanoff Project C/C++ Line Count ===${NC}"
+echo
+
+# Change to project root directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$PROJECT_ROOT"
+
+echo -e "${YELLOW}Project root: $PROJECT_ROOT${NC}"
+echo
+
+# Find all C/C++ files, excluding third-party and build artifacts
+CPP_FILES=$(find . -name "*.cpp" | grep -v third_party | grep -v build | grep -v b/ | grep -v "Livox-sdk-git" | grep -v CMakeFiles)
+H_FILES=$(find . -name "*.h" -o -name "*.hpp" | grep -v third_party | grep -v build | grep -v b/ | grep -v "Livox-sdk-git" | grep -v CMakeFiles)
+C_FILES=$(find . -name "*.c" | grep -v third_party | grep -v build | grep -v b/ | grep -v "Livox-sdk-git" | grep -v CMakeFiles)
+
+# Count lines for each file type
+echo -e "${BLUE}=== C++ Source Files (.cpp) ===${NC}"
+CPP_LINES=0
+if [ -n "$CPP_FILES" ]; then
+    echo "$CPP_FILES" | xargs wc -l
+    CPP_LINES=$(echo "$CPP_FILES" | xargs wc -l | tail -1 | awk '{print $1}')
+else
+    echo "No C++ source files found"
+fi
+echo
+
+echo -e "${BLUE}=== Header Files (.h, .hpp) ===${NC}"
+H_LINES=0
+if [ -n "$H_FILES" ]; then
+    echo "$H_FILES" | xargs wc -l
+    H_LINES=$(echo "$H_FILES" | xargs wc -l | tail -1 | awk '{print $1}')
+else
+    echo "No header files found"
+fi
+echo
+
+echo -e "${BLUE}=== C Source Files (.c) ===${NC}"
+C_LINES=0
+if [ -n "$C_FILES" ]; then
+    echo "$C_FILES" | xargs wc -l
+    C_LINES=$(echo "$C_FILES" | xargs wc -l | tail -1 | awk '{print $1}')
+else
+    echo "No C source files found"
+fi
+echo
+
+# Calculate total
+TOTAL_LINES=$((CPP_LINES + H_LINES + C_LINES))
+
+echo -e "${GREEN}=== SUMMARY ===${NC}"
+echo -e "C++ Source Files (.cpp): ${YELLOW}$CPP_LINES${NC} lines"
+echo -e "Header Files (.h/.hpp):  ${YELLOW}$H_LINES${NC} lines"
+echo -e "C Source Files (.c):     ${YELLOW}$C_LINES${NC} lines"
+echo -e "${GREEN}─────────────────────────${NC}"
+echo -e "${GREEN}Total C/C++ Lines:       ${YELLOW}$TOTAL_LINES${NC} lines${NC}"
+echo
+
+# Show what's excluded
+echo -e "${BLUE}=== Excluded from count: ===${NC}"
+echo "• third_party/ directory (googletest, etc.)"
+echo "• build*/ directories (build artifacts)"
+echo "• b/ directories (build artifacts)"
+echo "• Livox-sdk-git/ directory (third-party SDK)"
+echo "• CMakeFiles/ directories (generated files)"
+echo "• Generated files (*.d, *.o, etc.)"
+echo
+
+# Optional: Show file count breakdown by directory
+echo -e "${BLUE}=== Files by directory: ===${NC}"
+echo "C++ Source Files:"
+echo "$CPP_FILES" | sed 's|^\./||' | cut -d'/' -f1 | sort | uniq -c | sort -nr | head -10
+
+echo
+echo "Header Files:"
+echo "$H_FILES" | sed 's|^\./||' | cut -d'/' -f1 | sort | uniq -c | sort -nr | head -10
@@ -0,0 +1,171 @@
+==========================================
+Iteration 67 - Thu Oct 30 08:41:13 PM AST 2025
+==========================================
+
+GNU gdb (Ubuntu 15.0.50.20240403-0ubuntu1) 15.0.50.20240403-git
+Copyright (C) 2024 Free Software Foundation, Inc.
+License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
+This is free software: you are free to change and redistribute it.
+There is NO WARRANTY, to the extent permitted by law.
+Type "show copying" and "show warranty" for details.
+This GDB was configured as "x86_64-linux-gnu".
+Type "show configuration" for configuration details.
+For bug reporting instructions, please see:
+<https://www.gnu.org/software/gdb/bugs/>.
+Find the GDB manual and other documentation resources online at:
+    <http://www.gnu.org/software/gdb/documentation/>.
+
+For help, type "help".
+Type "apropos word" to search for commands related to "word"...
+Reading symbols from salmanoff...
+SIGINT is used by the debugger.
+Are you sure you want to change it? (y or n) [answered Y; input not from terminal]
+Waiting 2281ms before sending SIGINT...
+Starting program...
+
+This GDB supports auto-downloading debuginfo from the following URLs:
+  <https://debuginfod.ubuntu.com>
+Enable debuginfod for this session? (y or [n]) [answered N; input not from terminal]
+Debuginfod has been disabled.
+To make this setting permanent, add 'set debuginfod enabled off' to .gdbinit.
+[Thread debugging using libthread_db enabled]
+Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
+[New Thread 0x7ffff77ff6c0 (LWP 805031)]
+CRT:main: about to JOLT Mrntt with cmdline args
+main: Waiting for command line JOLT
+Mrntt:operator():JOLTED: setting cmdline args
+main: salmanoff 0.01.000
+main: DAP Specs: 
+DAP Spec Files: devices/bodies/dell-laptop.daps 
+Stim Buff API Library Paths: commonLibs/livoxProto1/ commonLibs/xcbXorg/ stimBuffApis/xcbWindow/ stimBuffApis/livoxGen1/ 
+Stim Buff API Libraries: libxcbWindow.so liblivoxGen1.so 
+
+initializeSalmanoff: Entered.
+
+main: Entering event loop
+[New Thread 0x7ffff6ffe6c0 (LWP 805032)]
+[New Thread 0x7ffff67fd6c0 (LWP 805033)]
+[New Thread 0x7ffff5ffc6c0 (LWP 805034)]
+[New Thread 0x7ffff57fb6c0 (LWP 805035)]
+[New Thread 0x7ffff4ffa6c0 (LWP 805036)]
+distributeAndPinThreadsAcrossCpus: Distributed 5 threads across 4 CPUs
+joltThreadReq1_posted: Thread 'director': handling JOLT request.
+joltThreadReq1_posted: Thread 'simulator': handling JOLT request.
+joltThreadReq1_posted: Thread 'subconscious': handling JOLT request.
+joltThreadReq1_posted: Thread 'body': handling JOLT request.
+body:main: Entering event loop
+simulator:main: Entering event loop
+subconscious:main: Entering event loop
+joltThreadReq1_posted: Thread 'world': handling JOLT request.
+world:main: Entering event loop
+Mrntt: All mind threads JOLTed.
+director:main: Entering event loop
+startThreadReq1_posted: Thread 'director': handling startThread.
+startThreadReq1_posted: Thread 'body': handling startThread.
+startThreadReq1_posted: Thread 'simulator': handling startThread.
+startThreadReq1_posted: Thread 'world': handling startThread.
+startThreadReq1_posted: Thread 'subconscious': handling startThread.
+Mrntt: All mind threads started.
+Library Path: libxcbWindow.so
+Stim Buff API Descriptor: Name: xcb
+Exported QualeIface APIs:
+  - visual-qualeiface
+
+
+Library Path: liblivoxGen1.so
+Stim Buff API Descriptor: Name: livoxGen1
+Exported QualeIface APIs:
+  - pcloud
+  - pcloudIntensity
+  - gyro
+  - accel
+
+
+
+start: BroadcastListener started on port 55000
+start: UDP Command Demuxer started on port 56001
+attachStimBuffDeviceReq1_posted: Attaching edev win0 to world thread
+xcbWindow_attachDeviceReq: Attached X11 window:
+  Display: 1, Screen: 0, MatchType: substring, Target: "mut", Found: "mutter guard window" (matched substring 'mut')
+attachStimBuffDeviceReq1_posted: Attaching edev avia0 to world thread
+getOrCreateDeviceReq1: Connection failed for device 3JEDK380010Z39
+attachDeviceReq1: Failed to create/find Livox device: 3JEDK380010Z39
+newDeviceAttachmentSpecInd2: Attach failed for device spec Device Identifier: avia0, Sensor Type: e, QualeIface API: structural-qualeiface, StimBuff API: livoxGen1, StimBuff API Params: (), Provider: livoxProto1, Provider Params: (), Device Selector: 3JEDK380010Z39
+
+attachAllUnattachedDevicesFromReq2: Failed to attach device: avia0
+Mrntt: attached 1 of 2 sense devices.
+Mrntt: Body component initialized.
+negtrinEventInd: Handling negtrin event.
+marionetteInitializeReqCb: Marionette initialized.
+broadcastMsgInd: Discovered new Livox device: DiscoveredDevice{identifier='3JEDK380010Z391', ipAddr='10.42.0.139', deviceType=7 (Avia)}
+attachStimBuffDeviceReq1_posted: Attaching edev avia0 to world thread
+attachDeviceReq1: Successfully attached/found Livox device: 3JEDK380010Z39 (ID: avia0)
+Sending SIGINT to program (PID: 805028)...
+SIGINT (Ctrl+C) received. Initiating shutdown...
+Mrntt: About to detach all sense devices.
+xcbWindow_detachDeviceReq: Detached X11 window device:
+Device Identifier: win0, Sensor Type: e, QualeIface API: visual-qualeiface, StimBuff API: xcb, StimBuff API Params: (dev-substring ), Provider: xorg, Provider Params: (display=1 screen=0 ), Device Selector: mut
+
+Mrntt: Successfully detached 1 of 1 sense devices.
+Mrntt: About to finalize all stim buff api libs.
+stop: UDP Command Demuxer stopped
+stop: BroadcastListener stopped
+broadcastMsgInd: Error receiving broadcast message: Operation canceled
+Mrntt: About to unload all stim buff api libs.
+
+Thread 7 "salmanoff" received signal SIGSEGV, Segmentation fault.
+[Switching to Thread 0x7ffff4ffa6c0 (LWP 805036)]
+0x0000000000000000 in ?? ()
+
+=== SEGFAULT DETECTED ===
+#0  0x0000000000000000 in ?? ()
+#1  0x00007ffff7ace057 in smo::stim_buff::AttachDeviceReq::attachDeviceReq2 (this=0x7ffff00098a0, 
+    context=std::shared_ptr<smo::stim_buff::AttachDeviceReq> (use count 4, weak count 1) = {...}, error=...) at /home/latentprion/gits/salmanoff-git/stimBuffApis/livoxGen1/livoxGen1.cpp:160
+#2  0x00007ffff7ae6584 in std::__invoke_impl<void, void (smo::stim_buff::AttachDeviceReq::*&)(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&), smo::stim_buff::AttachDeviceReq*&, std::shared_ptr<smo::stim_buff::AttachDeviceReq>&, boost::system::error_code const&> (
+    __f=@0x7ffff4ff9a80: (void (smo::stim_buff::AttachDeviceReq::*)(smo::stim_buff::AttachDeviceReq * const, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, const boost::system::error_code &)) 0x7ffff7acde68 <smo::stim_buff::AttachDeviceReq::attachDeviceReq2(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, 
+    __t=@0x7ffff4ff9aa0: 0x7ffff00098a0) at /usr/include/c++/13/bits/invoke.h:74
+#3  0x00007ffff7ae42b1 in std::__invoke<void (smo::stim_buff::AttachDeviceReq::*&)(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&), smo::stim_buff::AttachDeviceReq*&, std::shared_ptr<smo::stim_buff::AttachDeviceReq>&, boost::system::error_code const&> (
+    __fn=@0x7ffff4ff9a80: (void (smo::stim_buff::AttachDeviceReq::*)(smo::stim_buff::AttachDeviceReq * const, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, const boost::system::error_code &)) 0x7ffff7acde68 <smo::stim_buff::AttachDeviceReq::attachDeviceReq2(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>)
+    at /usr/include/c++/13/bits/invoke.h:96
+#4  0x00007ffff7ae1fe1 in std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>::__call<void, boost::system::error_code const&, 0ul, 1ul, 2ul>(std::tuple<boost::system::error_code const&>&&, std::_Index_tuple<0ul, 1ul, 2ul>) (this=0x7ffff4ff9a80, __args=...) at /usr/include/c++/13/functional:506
+#5  0x00007ffff7ade79a in std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>::operator()<boost::system::error_code const&, void>(boost::system::error_code const&) (this=0x7ffff4ff9a80)
+    at /usr/include/c++/13/functional:591
+#6  0x00007ffff7aec999 in boost::asio::detail::binder1<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::system::error_code>::operator()() (this=0x7ffff4ff9a80)
+    at /usr/include/boost/asio/detail/bind_handler.hpp:171
+#7  0x00007ffff7aebd0e in boost::asio::asio_handler_invoke<boost::asio::detail::binder1<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::system::error_code> >(boost::asio::detail::binder1<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::system::error_code>&, ...) (function=...) at /usr/include/boost/asio/handler_invoke_hook.hpp:88
+#8  0x00007ffff7aea450 in boost_asio_handler_invoke_helpers::invoke<boost::asio::detail::binder1<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::system::error_code>, std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)> >(boost::asio::detail::binder1<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::system::error_code>&, std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>&) (function=..., context=...) at /usr/include/boost/asio/detail/handler_invoke_helpers.hpp:54
+#9  0x00007ffff7ae8790 in boost::asio::detail::handler_work<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::asio::any_io_executor, void>::complete<boost::asio::detail::binder1<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::system::error_code> >(boost::asio::detail::binder1<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::system::error_code>&, std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>&) (this=0x7ffff4ff9a40, function=..., handler=...) at /usr/include/boost/asio/detail/handler_work.hpp:524
+#10 0x00007ffff7ae6986 in boost::asio::detail::wait_handler<std::_Bind<void (smo::stim_buff::AttachDeviceReq::*(smo::stim_buff::AttachDeviceReq*, std::shared_ptr<smo::stim_buff::AttachDeviceReq>, std::_Placeholder<1>))(std::shared_ptr<smo::stim_buff::AttachDeviceReq>, boost::system::error_code const&)>, boost::asio::any_io_executor>::do_complete(void*, boost::asio::detail::scheduler_operation*, boost::system::error_code const&, unsigned long) (owner=0x7ffff0007970, base=0x7fffe400a180) at /usr/include/boost/asio/detail/wait_handler.hpp:76
+#11 0x000055555556d35e in boost::asio::detail::scheduler_operation::complete (this=0x7fffe400a180, owner=0x7ffff0007970, ec=..., bytes_transferred=0)
+    at /usr/include/boost/asio/detail/scheduler_operation.hpp:40
+#12 0x00005555555706e7 in boost::asio::detail::scheduler::do_run_one (this=0x7ffff0007970, lock=..., this_thread=..., ec=...) at /usr/include/boost/asio/detail/impl/scheduler.ipp:493
+#13 0x00005555555700b9 in boost::asio::detail::scheduler::run (this=0x7ffff0007970, ec=...) at /usr/include/boost/asio/detail/impl/scheduler.ipp:210
+#14 0x0000555555570a9d in boost::asio::io_context::run (this=0x7ffff0007900) at /usr/include/boost/asio/impl/io_context.ipp:64
+#15 0x00005555555f6b10 in smo::MindThread::main (self=...) at /home/latentprion/gits/salmanoff-git/smocore/componentThread.cpp:82
+#16 0x00005555555f4ed3 in std::__invoke_impl<void, void (*)(smo::MindThread&), std::reference_wrapper<smo::MindThread> > (
+    __f=@0x7ffff0007bf0: 0x5555555f6984 <smo::MindThread::main(smo::MindThread&)>) at /usr/include/c++/13/bits/invoke.h:61
+#17 0x00005555555f4e41 in std::__invoke<void (*)(smo::MindThread&), std::reference_wrapper<smo::MindThread> > (
+    __fn=@0x7ffff0007bf0: 0x5555555f6984 <smo::MindThread::main(smo::MindThread&)>) at /usr/include/c++/13/bits/invoke.h:96
+#18 0x00005555555f4d2f in std::thread::_Invoker<std::tuple<void (*)(smo::MindThread&), std::reference_wrapper<smo::MindThread> > >::_M_invoke<0ul, 1ul> (this=0x7ffff0007be8)
+    at /usr/include/c++/13/bits/std_thread.h:292
+#19 0x00005555555f4c88 in std::thread::_Invoker<std::tuple<void (*)(smo::MindThread&), std::reference_wrapper<smo::MindThread> > >::operator() (this=0x7ffff0007be8)
+    at /usr/include/c++/13/bits/std_thread.h:299
+#20 0x00005555555f4bdc in std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (*)(smo::MindThread&), std::reference_wrapper<smo::MindThread> > > >::_M_run (this=0x7ffff0007be0)
+    at /usr/include/c++/13/bits/std_thread.h:244
+#21 0x00007ffff7cecdb4 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
+#22 0x00007ffff789caa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
+#23 0x00007ffff7929c6c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
+
+=== GDB is now interactive - you can inspect the state ===
+[Thread 0x7ffff4ffa6c0 (LWP 805036) exited]
+[Thread 0x7ffff57fb6c0 (LWP 805035) exited]
+[Thread 0x7ffff5ffc6c0 (LWP 805034) exited]
+[Thread 0x7ffff67fd6c0 (LWP 805033) exited]
+[Thread 0x7ffff6ffe6c0 (LWP 805032) exited]
+[Thread 0x7ffff7f5f780 (LWP 805028) exited]
+[Thread 0x7ffff77ff6c0 (LWP 805031) exited]
+[New process 805028]
+
+Program terminated with signal SIGSEGV, Segmentation fault.
+The program no longer exists.
+(gdb) 
@@ -0,0 +1,123 @@
+# GDB command file for reproducing UdpCommandDemuxer heisenbug
+# This script runs salmanoff, waits a random time, sends SIGINT, and catches segfaults
+
+# Disable pager so output doesn't pause for user input
+set pagination off
+
+# Set up signal handling - catch segfaults and stop
+handle SIGSEGV stop print
+# Allow SIGINT to pass through to program silently - make it unremarkable
+# nostop: don't stop execution, noprint: don't print message, pass: pass to program
+handle SIGINT nostop noprint pass
+
+# Use Python to set up automatic handling of stop events and SIGINT injection
+python
+import time
+import random
+import threading
+import os
+import signal
+
+sigint_thread_started = False
+
+def send_sigint_after_delay():
+    # Wait random milliseconds between 2000-3000
+    delay_ms = random.randint(2000, 3000)
+    print(f"Waiting {delay_ms}ms before sending SIGINT...")
+    time.sleep(delay_ms / 1000.0)
+    
+    # Send SIGINT directly to the process using its PID
+    # This works even when the program is running (unlike gdb.execute("signal SIGINT"))
+    try:
+        inferior = gdb.selected_inferior()
+        if inferior and inferior.is_valid():
+            pid = inferior.pid
+            print(f"Sending SIGINT to program (PID: {pid})...")
+            os.kill(pid, signal.SIGINT)
+        else:
+            print("Program is not running - cannot send SIGINT")
+    except Exception as e:
+        print(f"Failed to send SIGINT: {e}")
+
+def start_sigint_thread():
+    global sigint_thread_started
+    if not sigint_thread_started:
+        sigint_thread_started = True
+        thread = threading.Thread(target=send_sigint_after_delay, daemon=True)
+        thread.start()
+
+# Hook to check stop reason and handle segfaults
+def stop_handler(event):
+    if isinstance(event, gdb.SignalEvent):
+        if event.stop_signal == "SIGSEGV":
+            # Segfault detected
+            print("\n=== SEGFAULT DETECTED ===")
+            gdb.execute("bt")
+            print("\n=== GDB is now interactive - you can inspect the state ===")
+            # Don't quit - stay in interactive mode
+        elif event.stop_signal == "SIGINT":
+            # SIGINT received - with "nostop pass", SIGINT should pass through automatically
+            # But if we get here (shouldn't happen with nostop), just let it pass
+            pass
+    elif isinstance(event, gdb.ExitedEvent):
+        # Program exited normally
+        if event.exit_code == 0:
+            print("\nProgram exited normally. Continuing loop...")
+            gdb.post_event(lambda: gdb.execute("quit", False))
+        else:
+            print(f"\nProgram exited with code {event.exit_code}")
+            gdb.post_event(lambda: gdb.execute("quit", False))
+
+# Hook for when program continues/starts running
+def cont_handler(event):
+    # When program continues (or starts running), start the SIGINT thread
+    start_sigint_thread()
+
+# Register event handlers
+gdb.events.stop.connect(stop_handler)
+gdb.events.cont.connect(cont_handler)
+
+# Start SIGINT thread before running - it will wait and then send SIGINT
+# The thread will send SIGINT even if program is stopped (signal will be delivered on continue)
+start_sigint_thread()
+end
+
+# Start the program
+echo Starting program...\n
+run
+
+# After run completes, check if program exited or stopped
+# If program exited, quit GDB. If program stopped (has threads), continue.
+python
+try:
+    inferior = gdb.selected_inferior()
+    if inferior and inferior.is_valid():
+        # Check if there are threads (indicates program has not exited)
+        try:
+            threads = inferior.threads()
+            if threads:
+                # Program has threads - continue execution
+                # SIGINT thread is already running and will send signal when ready
+                gdb.execute("continue", False)
+            else:
+                # No threads - program has exited
+                print("\nProgram has exited (no threads found).")
+                gdb.execute("quit", False)
+        except Exception as e:
+            # If we can't check threads, assume program exited
+            print(f"\nError checking threads: {e}")
+            print("Assuming program exited.")
+            gdb.execute("quit", False)
+    else:
+        # Inferior is not valid - program has exited
+        print("\nProgram has exited (inferior not valid).")
+        gdb.execute("quit", False)
+except Exception as e:
+    print(f"Error checking program state: {e}")
+    # If we can't determine state, try to quit
+    try:
+        gdb.execute("quit", False)
+    except:
+        pass
+end
+
@@ -0,0 +1,115 @@
+#!/bin/bash
+# Script to reproduce UdpCommandDemuxer race condition heisenbug
+# Runs salmanoff in GDB repeatedly, injecting SIGINT at random intervals
+#
+# Usage: ./reproduce_heisenbug.sh [WORKING_DIR]
+#   WORKING_DIR: Working directory where salmanoff binary and all paths are relative to
+#                If not provided, uses WORKING_DIR environment variable, or defaults to project root
+#
+# Environment variables:
+#   WORKING_DIR: Working directory (can be overridden by command-line argument)
+
+# Get the directory where this script is located
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+# Determine working directory (command-line arg > env var > default)
+if [ -n "$1" ]; then
+    WORKING_DIR="$1"
+elif [ -n "$WORKING_DIR" ]; then
+    # Use environment variable
+    :
+else
+    # Default to project root
+    WORKING_DIR="$PROJECT_ROOT"
+fi
+
+# Convert to absolute path
+WORKING_DIR="$(cd "$WORKING_DIR" && pwd)"
+
+# Check if working directory exists
+if [ ! -d "$WORKING_DIR" ]; then
+    echo "Error: Working directory does not exist: $WORKING_DIR" >&2
+    exit 1
+fi
+
+# Paths - all relative to working directory
+SALMANOFF_BINARY="$WORKING_DIR/salmanoff"
+GDB_SCRIPT="$SCRIPT_DIR/gdb_heisenbug.gdb"
+
+# Check if binary exists
+if [ ! -f "$SALMANOFF_BINARY" ]; then
+    echo "Error: salmanoff binary not found at $SALMANOFF_BINARY" >&2
+    echo "Working directory: $WORKING_DIR" >&2
+    exit 1
+fi
+
+# Check if GDB script exists
+if [ ! -f "$GDB_SCRIPT" ]; then
+    echo "Error: GDB script not found at $GDB_SCRIPT" >&2
+    exit 1
+fi
+
+# Command line arguments for salmanoff
+SALMANOFF_ARGS=(
+    -p commonLibs/livoxProto1/
+    -p commonLibs/xcbXorg/
+    -p stimBuffApis/xcbWindow/
+    -p stimBuffApis/livoxGen1/
+    -a libxcbWindow.so
+    -a liblivoxGen1.so
+    -d devices/bodies/dell-laptop.daps
+)
+
+echo "=== UdpCommandDemuxer Heisenbug Reproduction Script ==="
+echo "Working Directory: $WORKING_DIR"
+echo "Binary: $SALMANOFF_BINARY"
+echo "GDB Script: $GDB_SCRIPT"
+echo "Arguments: ${SALMANOFF_ARGS[*]}"
+echo ""
+echo "Press Ctrl+C to stop the loop"
+echo ""
+
+# Change to working directory so all relative paths are resolved correctly
+cd "$WORKING_DIR"
+
+# Loop counter
+ITERATION=0
+
+# Main loop
+while true; do
+    ITERATION=$((ITERATION + 1))
+    echo "=========================================="
+    echo "Iteration $ITERATION - $(date)"
+    echo "=========================================="
+    echo ""
+
+    # Run GDB with the command file
+    # GDB will stay interactive on segfault, exit on normal completion
+    # When GDB stays interactive (on segfault), this will wait for user to quit GDB
+    # When GDB exits normally (program completed), exit code will be 0 and loop continues
+    # Note: We use a relative path to salmanoff binary since we're already in WORKING_DIR
+    SALMANOFF_RELATIVE="salmanoff"
+    if gdb -x "$GDB_SCRIPT" --args "$SALMANOFF_RELATIVE" "${SALMANOFF_ARGS[@]}"; then
+        # GDB exited successfully (program completed normally)
+        EXIT_CODE=0
+    else
+        # GDB exited with error (unexpected exit or user interrupted)
+        EXIT_CODE=$?
+        echo ""
+        echo "GDB exited with code $EXIT_CODE"
+        if [ $EXIT_CODE -ne 0 ] && [ $EXIT_CODE -ne 130 ]; then
+            # Exit code 130 is SIGINT (user pressed Ctrl+C), which is expected
+            echo "Unexpected GDB exit - check output above"
+        fi
+    fi
+
+    echo ""
+    echo "Iteration $ITERATION complete. Starting next iteration in 1 second..."
+    sleep 1
+    echo ""
+done
+
+echo ""
+echo "Loop terminated."
+
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`+edev\|win0\|visual-qualeiface()\|xcb(dev-substring)\|xorg(display=1\|screen=0)\|mut`