From e8044a0d17b998d4b7ef77166d8c6b5f2800130e Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Sat, 4 Apr 2026 13:17:43 -0400 Subject: [PATCH] OClCollMeshEngn: produce ambience into stimBuff frames directly --- docs/design/stencils.md | 12 +- docs/livox-gen1-lidar-dap-spec.md | 8 +- include/user/pcloudAmbienceStencil.h | 28 --- stimBuffApis/livoxGen1/collateDgrams.cl | 6 +- .../livoxGen1/lg1PcloudAmbienceStencil.h | 63 ----- .../openClCollatingAndMeshingEngine.cpp | 231 +++++------------- .../openClCollatingAndMeshingEngine.h | 12 - .../livoxGen1/pcloudAmbienceStimulusBuffer.h | 14 +- .../livoxGen1/pcloudStimulusProducer.cpp | 57 ++--- .../livoxGen1/pcloudStimulusProducer.h | 3 - 10 files changed, 101 insertions(+), 333 deletions(-) delete mode 100644 include/user/pcloudAmbienceStencil.h delete mode 100644 stimBuffApis/livoxGen1/lg1PcloudAmbienceStencil.h diff --git a/docs/design/stencils.md b/docs/design/stencils.md index 92694e8..3007cf6 100644 --- a/docs/design/stencils.md +++ b/docs/design/stencils.md @@ -77,14 +77,18 @@ raising new intrins. - Stimbuffs must respect this limit and wait for stencil returns before allocating new ones -**Example:** +**Example (generic device):** +``` ++idev|my-device|someQualeApi(n-stencils=4)|someStimBuffApi()|livoxProto1()|SERIAL +``` + +The `pcloudAmbience` Livox Gen1 path does **not** use the `n-stencils` parameter; ambience data is delivered as a dense float vector in the stimulus frame buffer, not via a separate stencil allocation list. + +**Deprecated example (do not use for Livox Gen1 ambience):** ``` +idev|my-device|pcloudAmbience(n-stencils=4)|livoxGen1-pcloud()|livoxProto1()|3JEDK380010Z39 ``` -This example allows the stimbuff to allocate up to 4 stencils -simultaneously. - ## Notes The stencil registration mechanism discussed above is not currently diff --git a/docs/livox-gen1-lidar-dap-spec.md b/docs/livox-gen1-lidar-dap-spec.md index 9f4e48d..3f7e81c 100644 --- a/docs/livox-gen1-lidar-dap-spec.md +++ b/docs/livox-gen1-lidar-dap-spec.md @@ -33,7 +33,7 @@ Each stim-buff-api is designed to work with specific stim-iface libraries that u ### 2. Point Cloud Ambience Data Device (Interoceptor) -**Purpose**: Provides ambience data from the LiDAR point cloud, computing average intensity per frame and counting frames that qualify for intrinsic stimuli. +**Purpose**: Provides ambience data from the LiDAR point cloud as a **vector of per-dagram average intensities** (one `float` per UDP datagram slot in the staging frame, length `n-dgrams-per-frame`). The OpenCL collate kernel writes these values directly into the acquired ambience `StimulusFrame` buffer. **Syntax**: ``` @@ -41,10 +41,10 @@ Each stim-buff-api is designed to work with specific stim-iface libraries that u ``` **Stim-Buff-API**: `livoxGen1-pcloud` -**Quale-Iface-API**: `pcloudAmbience` - Computes average intensity per frame and exports postrin/negtrin based on configurable thresholds +**Quale-Iface-API**: `pcloudAmbience` - Delivers per-dagram average intensity floats; postrin/negtrin binding and passband-style aggregation are being revised (see intrinsic parameters below). **Intrinsic Stimuli Support** (for pcloudAmbience quale-iface-api): -The `pcloudAmbience` quale-iface-api exports both a postrin and a negtrin whose +The `pcloudAmbience` quale-iface-api is intended to export both a postrin and a negtrin whose thresholds are configurable via standard quale-iface-api-params: - **Postrin interest threshold**: Configurable via `postrin-interest-[percentage|pc|threshold|thresh|thr]` - **Negtrin interest threshold**: Configurable via `negtrin-interest-[percentage|pc|threshold|thresh|thr]` @@ -165,7 +165,7 @@ The `livoxProto1` provider accepts the following parameters: | Stim Feature | Stim-Buff-API | Quale-Iface-API | Description | |--------------|---------------|----------------|-------------| | Point Cloud Intensity | `livoxGen1-pcloudIntensity` | `pcloudIntensity` | Light intensity/reflectivity data | -| Point Cloud Ambience | `livoxGen1-pcloud` | `pcloudAmbience` | High-intensity point count per slot | +| Point Cloud Ambience | `livoxGen1-pcloud` | `pcloudAmbience` | Per-dagram average intensity vector (`float` × `n-dgrams-per-frame`) | | Point Cloud Coordinates | `livoxGen1-pcloud` | `pcloud` | Spatial coordinate data | | Gyroscope | `livoxGen1-gyro` | `gyro` | Angular velocity measurements | | Accelerometer | `livoxGen1-accel` | `accel` | Linear acceleration measurements | diff --git a/include/user/pcloudAmbienceStencil.h b/include/user/pcloudAmbienceStencil.h deleted file mode 100644 index 480a8e7..0000000 --- a/include/user/pcloudAmbienceStencil.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _PCLOUD_AMBIENCE_STENCIL_H -#define _PCLOUD_AMBIENCE_STENCIL_H - -#include -#include -#include - -namespace smo { -namespace stim_buff { - -/** - * PcloudAmbienceStencil represents stencils for point cloud ambience data. - * This is a base class for device-specific implementations. - */ -class PcloudAmbienceStencil -: public smo::cologex::Stencil -{ -public: - typedef uint32_t PcloudAmbienceStimulusValue; - - PcloudAmbienceStencil() = default; - virtual ~PcloudAmbienceStencil() = default; -}; - -} // namespace stim_buff -} // namespace smo - -#endif // _PCLOUD_AMBIENCE_STENCIL_H diff --git a/stimBuffApis/livoxGen1/collateDgrams.cl b/stimBuffApis/livoxGen1/collateDgrams.cl index 0379a47..7ae1135 100644 --- a/stimBuffApis/livoxGen1/collateDgrams.cl +++ b/stimBuffApis/livoxGen1/collateDgrams.cl @@ -25,7 +25,7 @@ __kernel void collate( __global uchar* assembly, __global float* collation, __global float* intensityBuffer, - __global float* averageIntensityBuffer, + __global float* ambienceBuffer, uint slotStride, uint nPointsPerSlot, uint nDgramsPerFrame) @@ -368,9 +368,9 @@ __kernel void collate( // Unsupported data types are silently ignored // Write average intensity for this work item (once at the end) - if (averageIntensityBuffer != NULL) + if (ambienceBuffer != NULL) { - averageIntensityBuffer[slotIndex] = (validPointCount > 0) ? + ambienceBuffer[slotIndex] = (validPointCount > 0) ? (intensitySum / (float)validPointCount) : 0.0f; } } diff --git a/stimBuffApis/livoxGen1/lg1PcloudAmbienceStencil.h b/stimBuffApis/livoxGen1/lg1PcloudAmbienceStencil.h deleted file mode 100644 index 22d902d..0000000 --- a/stimBuffApis/livoxGen1/lg1PcloudAmbienceStencil.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef _LG1_PCLOUD_AMBIENCE_STENCIL_H -#define _LG1_PCLOUD_AMBIENCE_STENCIL_H - -#include "livoxGen1.h" -#include -#include - -namespace smo { -namespace stim_buff { - -/** - * LG1PcloudAmbienceStencil represents Livox Gen1-specific stencils for - * ambience data. It holds a single RangeDescriptor with stimulusBufferSpot=0 - * and nContiguousSpots=1. - */ -class LG1PcloudAmbienceStencil -: public PcloudAmbienceStencil -{ -public: - explicit LG1PcloudAmbienceStencil() - : PcloudAmbienceStencil(), - rangeDescriptor{0, 1} - {} - - ~LG1PcloudAmbienceStencil() = default; - - // Implement pure virtual functions from Stencil - bool hasData() const override - { - return true; - } - - size_t getRelevantCount() const override - { - return rangeDescriptor.nContiguousSpots; - } - - bool isRelevant(size_t offset) const override - { - return (offset >= rangeDescriptor.stimulusBufferSpot && - offset < (rangeDescriptor.stimulusBufferSpot - + rangeDescriptor.nContiguousSpots)); - } - - size_t getNRangeDescriptors() const override - { - return 1; - } - - bool buildStencilMetadata() override - { - // Metadata is already built (single fixed descriptor) - return true; - } - -public: - smo::cologex::Stencil::RangeDescriptor rangeDescriptor; -}; - -} // namespace stim_buff -} // namespace smo - -#endif // _LG1_PCLOUD_AMBIENCE_STENCIL_H diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 6c1ccb5..c4225a6 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -35,10 +35,8 @@ OpenClCollatingAndMeshingEngine::OpenClCollatingAndMeshingEngine( computeDevice(nullptr), clAssemblyBufferClBuffer(nullptr), clCollationBufferClBuffer(nullptr), -clAverageIntensityBufferClBuffer(nullptr), clAssemblyBuffer(nullptr), clCollationBuffer(nullptr), -clAverageIntensityBuffer(nullptr), shouldAcceptRequests(false), compactIsRunning(false), collateIsRunning(false), @@ -47,11 +45,8 @@ assemblyBufferPtr(nullptr), assemblyBufferSize(0), collationBufferPtr(nullptr), collationBufferSize(0), -averageIntensityBufferPtr(nullptr), -averageIntensityBufferSize(0), mappedAssemblyBuffer(nullptr), mappedCollationBuffer(nullptr), -mappedAverageIntensityBuffer(nullptr), frameAssemblyDesc(nullptr) { } @@ -90,15 +85,11 @@ bool OpenClCollatingAndMeshingEngine::setup() // Get StagingBuffer memory pointers from parent struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec(); struct iovec collationIov = parent.collationBuffer.getClEngineIovec(); - struct iovec averageIntensityIov = parent.averageIntensityBuffer - .getClEngineIovec(); assemblyBufferPtr = assemblyIov.iov_base; assemblyBufferSize = assemblyIov.iov_len; collationBufferPtr = collationIov.iov_base; collationBufferSize = collationIov.iov_len; - averageIntensityBufferPtr = averageIntensityIov.iov_base; - averageIntensityBufferSize = averageIntensityIov.iov_len; // Get FrameAssemblyDesc from assembly buffer frameAssemblyDesc = static_cast>( @@ -140,28 +131,13 @@ bool OpenClCollatingAndMeshingEngine::setup() return false; } - auto wip_clAverageIntensityBufferClBuffer = smoHooksPtr - ->ComputeManager_createUseHostPtrBuffer( - averageIntensityBufferPtr, averageIntensityBufferSize, - CL_MEM_WRITE_ONLY); - - if (!wip_clAverageIntensityBufferClBuffer) - { - std::cerr << __func__ << ": failed to create average intensity buffer" - << std::endl; - return false; - } - // Cache cl_mem handles for the device we're using cl_mem wip_clAssemblyBuffer = wip_clAssemblyBufferClBuffer ->getAssociatedBufferHandleForDevice(wip_computeDevice); cl_mem wip_clCollationBuffer = wip_clCollationBufferClBuffer ->getAssociatedBufferHandleForDevice(wip_computeDevice); - cl_mem wip_clAverageIntensityBuffer = wip_clAverageIntensityBufferClBuffer - ->getAssociatedBufferHandleForDevice(wip_computeDevice); - if (!wip_clAssemblyBuffer || !wip_clCollationBuffer - || !wip_clAverageIntensityBuffer) + if (!wip_clAssemblyBuffer || !wip_clCollationBuffer) { std::cerr << __func__ << ": failed to get buffer handles for device" << std::endl; @@ -186,10 +162,8 @@ bool OpenClCollatingAndMeshingEngine::setup() computeDevice = wip_computeDevice; clAssemblyBufferClBuffer = wip_clAssemblyBufferClBuffer; clCollationBufferClBuffer = wip_clCollationBufferClBuffer; - clAverageIntensityBufferClBuffer = wip_clAverageIntensityBufferClBuffer; clAssemblyBuffer = wip_clAssemblyBuffer; clCollationBuffer = wip_clCollationBuffer; - clAverageIntensityBuffer = wip_clAverageIntensityBuffer; slotCompactorProgram = std::move(wip_slotCompactorProgram); collateProgram = std::move(wip_collateProgram); slotCompactorKernel = std::move(wip_slotCompactorKernel); @@ -253,12 +227,6 @@ void OpenClCollatingAndMeshingEngine::finalize() // Release OpenCL buffers via smo hooks if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer) { - if (clAverageIntensityBufferClBuffer) - { - smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer( - clAverageIntensityBufferClBuffer); - clAverageIntensityBufferClBuffer.reset(); - } if (clCollationBufferClBuffer) { smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer( @@ -275,7 +243,6 @@ void OpenClCollatingAndMeshingEngine::finalize() // Reset cached cl_mem handles clCollationBuffer = nullptr; - clAverageIntensityBuffer = nullptr; clAssemblyBuffer = nullptr; // Release kernels and programs (handled automatically by unique_ptr destructors) @@ -301,8 +268,6 @@ void OpenClCollatingAndMeshingEngine::finalize() assemblyBufferSize = 0; collationBufferPtr = nullptr; collationBufferSize = 0; - averageIntensityBufferPtr = nullptr; - averageIntensityBufferSize = 0; frameAssemblyDesc = nullptr; } @@ -409,15 +374,11 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel( auto validateBuffers = [this]() { struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec(); struct iovec collationIov = parent.collationBuffer.getClEngineIovec(); - struct iovec averageIntensityIov = parent.averageIntensityBuffer - .getClEngineIovec(); if (assemblyIov.iov_base != assemblyBufferPtr || assemblyIov.iov_len != assemblyBufferSize || collationIov.iov_base != collationBufferPtr - || collationIov.iov_len != collationBufferSize - || averageIntensityIov.iov_base != averageIntensityBufferPtr - || averageIntensityIov.iov_len != averageIntensityBufferSize) + || collationIov.iov_len != collationBufferSize) { throw std::runtime_error( std::string(__func__) + ": buffer mismatch - buffers have changed"); @@ -454,14 +415,6 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel( } unmapCollationBuffer(); - if (!mapAverageIntensityBuffer(CL_MAP_WRITE)) - { - std::cerr << __func__ << ": failed to map average intensity buffer" - << std::endl; - return false; - } - - unmapAverageIntensityBuffer(); // Map/unmap intensity buffer if it exists if (intensityStimFrame.has_value()) @@ -486,6 +439,29 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel( } } + // Map/unmap ambience stim frame buffer (collate writes per-slot averages here) + if (ambienceStimFrame.has_value()) + { + StimulusFrame& ambienceFrame = ambienceStimFrame->get(); + cl_mem ambienceClBuffer = ambienceFrame.clBuffer + ->getAssociatedBufferHandleForDevice(computeDevice); + + if (ambienceClBuffer) + { + void* mappedAmbienceBuffer = nullptr; + if (!mapBuffer( + ambienceClBuffer, ambienceFrame.slotDesc.nBytes, + CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer)) + { + std::cerr << __func__ << ": failed to map ambience buffer" + << std::endl; + return false; + } + + unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer); + } + } + // Calculate global work size (just num slots in the frame) size_t globalWorkSize = static_cast(frameAssemblyDesc->numSlots); @@ -708,21 +684,26 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( return false; } - // Set average intensity buffer argument (arg 3) - /** EXPLANATION: - * We only pass the average intensity buffer argument to the collate kernel - * when ambienceStimFrame is present. This is because the collate kernel - * only needs the average intensity buffer if ambience processing is - * requested (i.e., the ambience stimulus buffer is attached). If no - * ambienceStimFrame is supplied, we skip passing the buffer to avoid - * unnecessary work. - */ - cl_mem averageIntensityClBuffer = nullptr; - if (ambienceStimFrame.has_value()) { - averageIntensityClBuffer = clAverageIntensityBuffer; + // Set ambience buffer argument (arg 3): acquired PcloudAmbience StimulusFrame + cl_mem ambienceClBufferArg = nullptr; + if (ambienceStimFrame.has_value()) + { + StimulusFrame& ambienceFrame = ambienceStimFrame->get(); + const size_t needBytes = static_cast(nDgramsPerFrame) + * sizeof(float); + + if (ambienceFrame.slotDesc.nBytes < needBytes) + { + std::cerr << __func__ << ": ambience stim frame slot too small: " + << ambienceFrame.slotDesc.nBytes << " < " << needBytes + << std::endl; + return false; + } + ambienceClBufferArg = ambienceFrame.clBuffer + ->getAssociatedBufferHandleForDevice(computeDevice); } err = clSetKernelArg( - collateKernel.get(), 3, sizeof(cl_mem), &averageIntensityClBuffer); + collateKernel.get(), 3, sizeof(cl_mem), &ambienceClBufferArg); if (err != CL_SUCCESS) { @@ -804,8 +785,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete( std::optional> ambienceStimFrame, bool isFinalizing) { - (void)ambienceStimFrame; - cl_map_flags mapFlags; /** EXPLANATION: * Technically we should only need to do this if we plan to read the @@ -818,10 +797,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete( unmapCollationBuffer(); } - if (mapAverageIntensityBuffer(mapFlags)) { - unmapAverageIntensityBuffer(); - } - // Map/unmap intensity buffer if it exists if (intensityStimFrame.has_value()) { @@ -841,6 +816,25 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete( } } + // Sync GPU writes into ambience stim frame host backing store + if (ambienceStimFrame.has_value()) + { + StimulusFrame& ambienceFrame = ambienceStimFrame->get(); + cl_mem ambienceClBuffer = ambienceFrame.clBuffer + ->getAssociatedBufferHandleForDevice(computeDevice); + + if (ambienceClBuffer) + { + void* mappedAmbienceBuffer = nullptr; + if (mapBuffer( + ambienceClBuffer, ambienceFrame.slotDesc.nBytes, + CL_MAP_READ, mappedAmbienceBuffer)) + { + unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer); + } + } + } + clFlush(computeDevice->commandQueue); // Stop only collate kernel @@ -962,60 +956,6 @@ bool OpenClCollatingAndMeshingEngine::unmapCollationBuffer() return true; } -bool OpenClCollatingAndMeshingEngine::mapAverageIntensityBuffer( - cl_map_flags mapFlags - ) -{ - return mapBuffer( - clAverageIntensityBuffer, averageIntensityBufferSize, mapFlags, - mappedAverageIntensityBuffer); -} - -bool OpenClCollatingAndMeshingEngine::unmapAverageIntensityBuffer() -{ - unmapBuffer(clAverageIntensityBuffer, mappedAverageIntensityBuffer); - mappedAverageIntensityBuffer = nullptr; - return true; -} - -void OpenClCollatingAndMeshingEngine::produceAmbienceStimulusFrame( - std::optional> ambienceStimFrame, - uint32_t nSucceeded) -{ - if (!ambienceStimFrame.has_value()) { return; } - - std::shared_ptr ambienceBuff = - parent.ambienceStimulusBuffer.load(std::memory_order_acquire); - if (!ambienceBuff) { return; } - - const auto& ambienceCountComparator = ambienceBuff->ambienceCountComparator; - - // Read average intensity values from averageIntensityBuffer - float* averageIntensityAverages = reinterpret_cast( - averageIntensityBufferPtr); - - uint32_t ambiencePassbandCount = 0; - if (ambienceCountComparator.has_value()) - { - // Count frames whose average intensity matches the configured comparator. - for (uint32_t i = 0; i < nSucceeded; ++i) - { - float avg = averageIntensityAverages[i]; - if (ambienceCountComparator.value()(avg)) { - ++ambiencePassbandCount; - } - } - } - - // Write the ambience count to the ambienceStimFrame - StimulusFrame& ambienceFrame = ambienceStimFrame->get(); - using PcloudAmbienceStimVal = PcloudAmbienceStencil - ::PcloudAmbienceStimulusValue; - PcloudAmbienceStimVal* ambienceValue = reinterpret_cast< - PcloudAmbienceStimVal*>(ambienceFrame.slotDesc.vaddr); - ambienceValue[0] = ambiencePassbandCount; -} - class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq : public sscl::PostedAsynchronousContinuation { @@ -1191,13 +1131,6 @@ public: uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load(); - // Produce ambience frame if ambience buffer is attached - if (context->ambienceStimFrame.has_value()) - { - engine.produceAmbienceStimulusFrame( - context->ambienceStimFrame, nSucceeded); - } - int returnMode = static_cast(engine.parent.device->currentReturnMode); size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram( returnMode); @@ -1218,46 +1151,10 @@ public: } } } + (void)highIntensityCount; #if 0 - // Print all averages above thresholds from average intensity buffer - if (context->ambienceStimFrame.has_value()) - { - std::shared_ptr ambienceBuff = - engine.parent.ambienceStimulusBuffer.load(std::memory_order_acquire); - const auto& ambienceCountComparator = - ambienceBuff->ambienceCountComparator; - uint32_t postrinThreshold = ambienceBuff->postrinInterestThreshold; - - float* averageIntensityAverages = reinterpret_cast( - engine.averageIntensityBufferPtr); - // Count frames that met the postrin threshold - uint32_t framesMetThreshold = 0; - for (uint32_t i = 0; i < nSucceeded; ++i) - { - float avg = averageIntensityAverages[i]; - if (ambienceCountComparator(avg)) { - ++framesMetThreshold; - } - } - - // Read the stimFrame value (ambience count) - StimulusFrame& ambienceFrame = context->ambienceStimFrame->get(); - using PcloudAmbienceStimVal = PcloudAmbienceStencil - ::PcloudAmbienceStimulusValue; - PcloudAmbienceStimVal* ambienceValue = reinterpret_cast< - PcloudAmbienceStimVal*>(ambienceFrame.slotDesc.vaddr); - PcloudAmbienceStimVal stimFrameValue = ambienceValue[0]; - - bool meetsPostrinThreshold = (framesMetThreshold >= postrinThreshold); - - std::cout << __func__ << ": frames met threshold=" << framesMetThreshold - << ", stimFrame value=" << stimFrameValue - << ", postrin threshold=" << postrinThreshold - << ", meets postrin=" << (meetsPostrinThreshold ? "yes" : "no") - << std::endl; - } - + // Legacy debug: ambience floats live in ambienceStimFrame after collate std::cout << __func__ << ": intensityRingBufferIndex=" << (context->intensityStimFrame.has_value() ? context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX) diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index bf7b5de..8440166 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -108,11 +108,6 @@ public: std::chrono::milliseconds getCompactKernelDuration() const; std::chrono::milliseconds getCollateKernelDuration() const; - // Produce ambience frame from average intensity data - void produceAmbienceStimulusFrame( - std::optional> ambienceStimFrame, - uint32_t nSucceeded); - private: PcloudStimulusProducer& parent; @@ -126,11 +121,9 @@ private: // OpenCL buffers (managed by ComputeManager) std::shared_ptr clAssemblyBufferClBuffer; std::shared_ptr clCollationBufferClBuffer; - std::shared_ptr clAverageIntensityBufferClBuffer; // Cached cl_mem handles for the device we're using cl_mem clAssemblyBuffer; cl_mem clCollationBuffer; - cl_mem clAverageIntensityBuffer; // State tracking sscl::SpinLock shouldAcceptRequestsLock; @@ -145,12 +138,9 @@ private: size_t assemblyBufferSize; void* collationBufferPtr; size_t collationBufferSize; - void* averageIntensityBufferPtr; - size_t averageIntensityBufferSize; // Mapped buffer pointers (for zero-copy synchronization) void* mappedAssemblyBuffer; void* mappedCollationBuffer; - void* mappedAverageIntensityBuffer; // Frame descriptor (cached from setup) std::shared_ptr frameAssemblyDesc; @@ -196,8 +186,6 @@ private: bool unmapAssemblyBuffer(); bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ); bool unmapCollationBuffer(); - bool mapAverageIntensityBuffer(cl_map_flags mapFlags = CL_MAP_READ); - bool unmapAverageIntensityBuffer(); // Forward declaration for continuation class class CompactCollateAndMeshFrameReq; diff --git a/stimBuffApis/livoxGen1/pcloudAmbienceStimulusBuffer.h b/stimBuffApis/livoxGen1/pcloudAmbienceStimulusBuffer.h index cf40fad..0e99df7 100644 --- a/stimBuffApis/livoxGen1/pcloudAmbienceStimulusBuffer.h +++ b/stimBuffApis/livoxGen1/pcloudAmbienceStimulusBuffer.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -13,7 +12,6 @@ #include #include #include "pcloudAmbienceQualeIfaceApi.h" -#include "lg1PcloudAmbienceStencil.h" namespace smo { namespace stim_buff { @@ -175,12 +173,12 @@ public: const StagingBuffer::IOEngineConstraints& outputEngineConstraints, const SmoCallbacks& callbacks, cl_mem_flags flags, - size_t nStencils_, size_t nDgramsPerFrame_) + size_t nDgramsPerFrame_) : StimulusBuffer( parent, deviceAttachmentSpec, histbuffMs, inputEngineConstraints, outputEngineConstraints, callbacks, flags), - nStencils(nStencils_) + nDgramsPerFrame(nDgramsPerFrame_) { intrin::validateNoForbiddenUnitlessIntrinParams( deviceAttachmentSpec->qualeIfaceApiParams); @@ -195,11 +193,6 @@ public: deviceAttachmentSpec); validateAmbienceIntrinComparatorConfig( intrinStatus, ambienceCountComparator); - - // Construct stencils and add to list (FIFO behavior) - for (size_t i = 0; i < nStencils; ++i) { - stencils.emplace_back(); - } } ~PcloudAmbienceStimulusBuffer() = default; @@ -223,8 +216,7 @@ public: uint32_t intrinInterestPercentage; uint32_t intrinInterestThreshold; std::optional ambienceCountComparator; - size_t nStencils; - std::list stencils; + size_t nDgramsPerFrame; }; } // namespace stim_buff diff --git a/stimBuffApis/livoxGen1/pcloudStimulusProducer.cpp b/stimBuffApis/livoxGen1/pcloudStimulusProducer.cpp index 1da12bf..415be2e 100644 --- a/stimBuffApis/livoxGen1/pcloudStimulusProducer.cpp +++ b/stimBuffApis/livoxGen1/pcloudStimulusProducer.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include "livoxGen1.h" #include "pcloudStimulusProducer.h" @@ -54,20 +53,14 @@ static StagingBuffer::IOEngineConstraints openClIntensityInputConstraints( // framePadToNBytes (pointer size) static_cast(sysconf(_SC_PAGE_SIZE))); +/* IOEngineConstraints for PcloudAmbienceStimulusBuffer's StagingBuffer, which + * backs SpMcRingBuffer (one StimulusFrame per ring slot). Not the OpenCL + * collating engine's assembly/collation buffers — those use assemblyBuffer / + * collationBuffer above. slotPadToNBytes here is the byte size of each ringbuff + * slot: nDgramsPerStagingBufferFrame floats (set in ctor). + */ static StagingBuffer::IOEngineConstraints openClAmbienceInputConstraints( - // slotStartAlignmentByteVal (sizeof(void*)) - sizeof(PcloudAmbienceStencil::PcloudAmbienceStimulusValue), - // slotPadToNBytes (sizeof(PcloudAmbienceStimulusValue)) - sizeof(PcloudAmbienceStencil::PcloudAmbienceStimulusValue), - // frameStartAlignmentByteVal (page alignment) - static_cast(sysconf(_SC_PAGE_SIZE)), - // framePadToNBytes (page alignment) - static_cast(sysconf(_SC_PAGE_SIZE))); - -static StagingBuffer::IOEngineConstraints openClAverageIntensityConstraints( - // slotStartAlignmentByteVal (sizeof(float)) sizeof(float), - // slotPadToNBytes (sizeof(float)) sizeof(float), // frameStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), @@ -96,12 +89,6 @@ collationBuffer( StagingBuffer::IOEngineConstraints::openClInputConstraints, nDgramsPerStagingBufferFrame), collationBufferMlockPinner(collationBuffer.makeMlockPinner()), -averageIntensityBuffer( - openClAverageIntensityConstraints, - openClAverageIntensityConstraints, - nDgramsPerStagingBufferFrame), -averageIntensityBufferMlockPinner( - averageIntensityBuffer.makeMlockPinner()), pcloudFrameDumper(deviceAttachmentSpec), tempStimulusFrameMem(0), tempStimulusFrame( @@ -111,6 +98,10 @@ tempStimulusFrame( sizeof(tempStimulusFrameMem)}, *smoHooksPtr, 0, SIZE_MAX) { + // See comment in openClAmbienceInputConstraints above. + openClAmbienceInputConstraints.slotPadToNBytes = + nDgramsPerStagingBufferFrame * sizeof(float); + if (smoHooksPtr->OptionParser_getOptions().verbose) { std::cout << __func__ << ": assembly buffer : " @@ -291,12 +282,10 @@ PcloudStimulusProducer::getOrCreateAttachedStimulusBuffer( // Parse qualeIfaceApi to determine buffer type const std::string& qualeIfaceApi = deviceAttachmentSpec->qualeIfaceApi; - // Calculate nPointsPerDgram based on return mode - size_t nPointsPerDgram = livoxProto1::Device::getNPointsPerDgram( - static_cast(device->currentReturnMode)); - if (qualeIfaceApi == "mesh") { + size_t nPointsPerDgram = livoxProto1::Device::getNPointsPerDgram( + static_cast(device->currentReturnMode)); /* Calculate slotStrideNBytes: * nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 3 */ @@ -318,6 +307,8 @@ PcloudStimulusProducer::getOrCreateAttachedStimulusBuffer( } else if (qualeIfaceApi == "pcloudIntensity") { + size_t nPointsPerDgram = livoxProto1::Device::getNPointsPerDgram( + static_cast(device->currentReturnMode)); /* Calculate slotStrideNBytes: * nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 1 */ @@ -340,27 +331,17 @@ PcloudStimulusProducer::getOrCreateAttachedStimulusBuffer( } else if (qualeIfaceApi == "pcloudAmbience") { - // Parse n-stencils from qualeIfaceApiParams - const std::vector nStencilsParamNames = { - "n-stencils" - }; - int nStencilsInt = device::DeviceAttachmentSpec - ::parseOptionalParamAsIntWithSynonyms( - deviceAttachmentSpec->qualeIfaceApiParams, - nStencilsParamNames, 1); - size_t nStencils = static_cast(nStencilsInt); - - auto ambienceBuffer = std::make_shared( + auto ambienceStimBuff = std::make_shared( *this, deviceAttachmentSpec, histbuffMs, openClAmbienceInputConstraints, openClAmbienceInputConstraints, *smoHooksPtr, CL_MEM_READ_WRITE, - nStencils, this->nDgramsPerStagingBufferFrame); + this->nDgramsPerStagingBufferFrame); this->stop(); - addAttachedStimulusBufferIfNotExists(ambienceBuffer); - ambienceStimulusBuffer.store(ambienceBuffer, std::memory_order_release); + addAttachedStimulusBufferIfNotExists(ambienceStimBuff); + ambienceStimulusBuffer.store(ambienceStimBuff, std::memory_order_release); this->start(); - return ambienceBuffer; + return ambienceStimBuff; } else { diff --git a/stimBuffApis/livoxGen1/pcloudStimulusProducer.h b/stimBuffApis/livoxGen1/pcloudStimulusProducer.h index d6ea2c3..fc55faf 100644 --- a/stimBuffApis/livoxGen1/pcloudStimulusProducer.h +++ b/stimBuffApis/livoxGen1/pcloudStimulusProducer.h @@ -96,9 +96,6 @@ public: IoUringAssemblyEngine ioUringAssemblyEngine; StagingBuffer collationBuffer; std::unique_ptr collationBufferMlockPinner; - StagingBuffer averageIntensityBuffer; - std::unique_ptr - averageIntensityBufferMlockPinner; LivoxPcloudFrameDumper pcloudFrameDumper; size_t tempStimulusFrameMem; StimulusFrame tempStimulusFrame;