From ce690bc3f40a2680960d98a63a8ca221d0b530a2 Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Sun, 23 Nov 2025 07:20:55 -0400 Subject: [PATCH] PcloudStimProducer,OClCollMeshEngn: Produce ambience stim feature The collation kernel now also produces the ambience stim feature values into the ambience stimbuff frames. --- stimBuffApis/livoxGen1/collateDgrams.cl | 38 +++++ .../openClCollatingAndMeshingEngine.cpp | 141 +++++++++++++++--- .../openClCollatingAndMeshingEngine.h | 6 +- .../livoxGen1/pcloudAmbienceStimulusBuffer.h | 10 +- .../livoxGen1/pcloudStimulusProducer.cpp | 45 +++++- 5 files changed, 214 insertions(+), 26 deletions(-) diff --git a/stimBuffApis/livoxGen1/collateDgrams.cl b/stimBuffApis/livoxGen1/collateDgrams.cl index 29dc2c2..fb4a61f 100644 --- a/stimBuffApis/livoxGen1/collateDgrams.cl +++ b/stimBuffApis/livoxGen1/collateDgrams.cl @@ -25,6 +25,8 @@ __kernel void collate( __global uchar* assembly, __global float* collation, __global float* intensityBuffer, + __global uint* ambienceBuffer, + uint ambienceHighVal, uint slotStride, uint nPointsPerSlot, uint nDgramsPerFrame) @@ -53,6 +55,9 @@ __kernel void collate( uint intensityBaseOffset = slotIndex * nPointsPerSlot; DBG_PRINTF("Running kernel: about to process points in slot.\n"); + // Initialize ambience counter for this work item + uint ambienceCount = 0; + // Process based on data type using nested ifs (outer) with loops (inner) if (dataType == 0) { @@ -92,6 +97,10 @@ __kernel void collate( if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + i] = intensity; } + // Count high intensity values for ambience buffer + if (intensity >= ambienceHighVal) { + ++ambienceCount; + } // Don't write intensity to collation buffer } } @@ -134,6 +143,10 @@ __kernel void collate( if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + i] = intensity; } + // Count high intensity values for ambience buffer + if (intensity >= ambienceHighVal) { + ++ambienceCount; + } // Don't write intensity to collation buffer } } @@ -177,6 +190,10 @@ __kernel void collate( if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity1; } + // Count high intensity values for ambience buffer + if (intensity1 >= ambienceHighVal) { + ++ambienceCount; + } // Don't write intensity to collation buffer ++pointIndex; @@ -208,6 +225,10 @@ __kernel void collate( if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity2; } + // Count high intensity values for ambience buffer + if (intensity2 >= ambienceHighVal) { + ++ambienceCount; + } // Don't write intensity to collation buffer ++pointIndex; } @@ -252,6 +273,10 @@ __kernel void collate( if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity1; } + // Count high intensity values for ambience buffer + if (intensity1 >= ambienceHighVal) { + ++ambienceCount; + } // Don't write intensity to collation buffer ++pointIndex; @@ -283,6 +308,10 @@ __kernel void collate( if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity2; } + // Count high intensity values for ambience buffer + if (intensity2 >= ambienceHighVal) { + ++ambienceCount; + } // Don't write intensity to collation buffer ++pointIndex; @@ -314,9 +343,18 @@ __kernel void collate( if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity3; } + // Count high intensity values for ambience buffer + if (intensity3 >= ambienceHighVal) { + ++ambienceCount; + } // Don't write intensity to collation buffer ++pointIndex; } } // Unsupported data types are silently ignored + + // Write ambience count for this work item (once at the end) + if (ambienceBuffer != NULL) { + ambienceBuffer[slotIndex] = ambienceCount; + } } diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 65e28a7..65f5ef1 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -165,7 +166,7 @@ void OpenClCollatingAndMeshingEngine::finalize() // Complete any running kernels if (compactIsRunning) { compactKernelComplete(true); } - if (collateIsRunning) { collateKernelComplete(std::nullopt, true); } + if (collateIsRunning) { collateKernelComplete(std::nullopt, std::nullopt, true); } // Release OpenCL buffers via smo hooks if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer) @@ -326,6 +327,7 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel( bool OpenClCollatingAndMeshingEngine::startCollateKernel( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, std::optional> intensityStimFrame, + std::optional> ambienceStimFrame, collateKernelCbFn callback) { // Store the caller's callback @@ -346,8 +348,12 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel( }; // Setup args callable - auto setupArgs = [this, &assemblyBuff, intensityStimFrame]() { - return setupCollateDgramsArgs(assemblyBuff, intensityStimFrame); + auto setupArgs = [ + this, &assemblyBuff, intensityStimFrame, ambienceStimFrame + ]() + { + return setupCollateDgramsArgs( + assemblyBuff, intensityStimFrame, ambienceStimFrame); }; /** EXPLANATION: @@ -385,7 +391,8 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel( if (intensityClBuffer) { void* mappedIntensityBuffer = nullptr; - if (!mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes, + if (!mapBuffer( + intensityClBuffer, intensityFrame.slotDesc.nBytes, CL_MAP_WRITE_INVALIDATE_REGION, mappedIntensityBuffer)) { std::cerr << __func__ << ": failed to map intensity buffer" @@ -397,6 +404,29 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel( } } + // Map/unmap ambience buffer if it exists + if (ambienceStimFrame.has_value()) + { + StimulusFrame& ambienceFrame = ambienceStimFrame->get(); + cl_mem ambienceClBuffer = ambienceFrame.clBuffer + ->getAssociatedBufferHandleForDevice(computeDevice); + + if (ambienceClBuffer) + { + void* mappedAmbienceBuffer = nullptr; + if (!mapBuffer( + ambienceClBuffer, ambienceFrame.slotDesc.nBytes, + CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer)) + { + std::cerr << __func__ << ": failed to map ambience buffer" + << std::endl; + return false; + } + + unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer); + } + } + // Calculate global work size (just num slots in the frame) size_t globalWorkSize = static_cast(frameAssemblyDesc->numSlots); @@ -552,7 +582,8 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs( bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( StagingBuffer& assemblyBuff, - std::optional> intensityStimFrame) + std::optional> intensityStimFrame, + std::optional> ambienceStimFrame) { // Extract parameters for collateDgrams kernel uint32_t slotStride = static_cast(assemblyBuff.slotStrideNBytes); @@ -602,7 +633,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( return false; } - err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &slotStride); + // Set ambience buffer argument (arg 3) + cl_mem ambienceClBuffer = nullptr; + if (ambienceStimFrame.has_value()) + { + ambienceClBuffer = ambienceStimFrame->get().clBuffer + ->getAssociatedBufferHandleForDevice(computeDevice); + } + err = clSetKernelArg(collateKernel, 3, sizeof(cl_mem), &ambienceClBuffer); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set kernel arg 3: " << err @@ -610,7 +648,12 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( return false; } - err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot); + // Set ambienceHighVal argument (arg 4) + uint32_t ambienceHighVal = 0; + if (ambienceStimFrame.has_value() && parent.ambienceStimulusBuffer) { + ambienceHighVal = parent.ambienceStimulusBuffer->ambienceHighVal; + } + err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &ambienceHighVal); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set kernel arg 4: " << err @@ -618,10 +661,26 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( return false; } - err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame); + err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &slotStride); if (err != CL_SUCCESS) { - std::cerr << __func__ << ": failed to set kernel arg 5: " << err + std::cerr << __func__ << ": failed to set kernel arg 3: " << err + << std::endl; + return false; + } + + err = clSetKernelArg(collateKernel, 6, sizeof(uint32_t), &nPointsPerSlot); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set kernel arg 6: " << err + << std::endl; + return false; + } + + err = clSetKernelArg(collateKernel, 7, sizeof(uint32_t), &nDgramsPerFrame); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set kernel arg 7: " << err << std::endl; return false; } @@ -670,6 +729,7 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing) void OpenClCollatingAndMeshingEngine::collateKernelComplete( std::optional> intensityStimFrame, + std::optional> ambienceStimFrame, bool isFinalizing) { cl_map_flags mapFlags; @@ -694,7 +754,8 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete( if (intensityClBuffer) { void* mappedIntensityBuffer = nullptr; - if (mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes, + if (mapBuffer( + intensityClBuffer, intensityFrame.slotDesc.nBytes, CL_MAP_READ, mappedIntensityBuffer)) { unmapBuffer(intensityClBuffer, mappedIntensityBuffer); @@ -702,6 +763,25 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete( } } + // Map/unmap ambience buffer if it exists + if (ambienceStimFrame.has_value()) + { + StimulusFrame& ambienceFrame = ambienceStimFrame->get(); + cl_mem ambienceClBuffer = ambienceFrame.clBuffer + ->getAssociatedBufferHandleForDevice(computeDevice); + + if (ambienceClBuffer) + { + void* mappedAmbienceBuffer = nullptr; + if (mapBuffer( + ambienceClBuffer, ambienceFrame.slotDesc.nBytes, + CL_MAP_READ, mappedAmbienceBuffer)) + { + unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer); + } + } + } + clFlush(computeDevice->commandQueue); // Stop only collate kernel @@ -833,6 +913,7 @@ private: AsynchronousLoop frameAssemblyResult; StimulusFrame& stimulusFrame; std::optional> intensityStimFrame; + std::optional> ambienceStimFrame; public: CompactCollateAndMeshFrameReq( @@ -840,13 +921,15 @@ public: AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame_, std::optional> intensityStimFrame_, + std::optional> ambienceStimFrame_, const std::shared_ptr& caller, Callback cb) : PostedAsynchronousContinuation( caller, cb), engine(engine_), frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_), - intensityStimFrame(intensityStimFrame_) + intensityStimFrame(intensityStimFrame_), + ambienceStimFrame(ambienceStimFrame_) {} public: @@ -941,7 +1024,7 @@ public: bool success = engine.startCollateKernel( engine.parent.assemblyBuffer, engine.parent.collationBuffer, - context->intensityStimFrame, + context->intensityStimFrame, context->ambienceStimFrame, std::bind( &CompactCollateAndMeshFrameReq ::compactCollateAndMeshFrameReq4_collateDone_maybePosted, @@ -950,7 +1033,9 @@ public: if (!success) { - engine.collateKernelComplete(context->intensityStimFrame); + engine.collateKernelComplete( + context->intensityStimFrame, context->ambienceStimFrame); + callOriginalCallback(false); return; } @@ -979,7 +1064,9 @@ public: * Therefore it's finalize()'s responsibility to ensure that it properly * completes/cleans up any in-flight operations. */ - engine.collateKernelComplete(context->intensityStimFrame); + engine.collateKernelComplete( + context->intensityStimFrame, context->ambienceStimFrame); + // Record collate kernel end time engine.collateKernelEndTime = std::chrono::high_resolution_clock::now(); @@ -1015,12 +1102,29 @@ public: } } - std::cout << __func__ << ": ringBufferIndex=" - << context->intensityStimFrame->get().ringBufferIndex + // Sum up ambience counts from ambience buffer + uint32_t ambienceCountSum = 0; + if (context->ambienceStimFrame.has_value()) + { + StimulusFrame& ambienceFrame = context->ambienceStimFrame->get(); + uint32_t* ambienceCounts = reinterpret_cast(ambienceFrame.slotDesc.vaddr); + for (uint32_t i = 0; i < nSucceeded; ++i) + { + ambienceCountSum += ambienceCounts[i]; + } + } + + std::cout << __func__ << ": intensityRingBufferIndex=" + << (context->intensityStimFrame.has_value() ? + context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX) + << ", ambienceRingBufferIndex=" + << (context->ambienceStimFrame.has_value() ? + context->ambienceStimFrame->get().ringBufferIndex : SIZE_MAX) << ", pointsPerDgram=" << pointsPerDgram << ", nSucceeded=" << nSucceeded << ", totalPoints=" << totalPoints - << ", highIntensityCount=" << highIntensityCount << std::endl; + << ", highIntensityCount=" << highIntensityCount + << ", ambienceCountSum=" << ambienceCountSum << std::endl; callOriginalCallback(success); } @@ -1029,6 +1133,7 @@ public: void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq( AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame, std::optional> intensityStimFrame, + std::optional> ambienceStimFrame, Callback callback) { { @@ -1042,7 +1147,7 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq( auto caller = smoHooksPtr->ComponentThread_getSelf(); auto request = std::make_shared( - *this, asyncLoop, stimulusFrame, intensityStimFrame, + *this, asyncLoop, stimulusFrame, intensityStimFrame, ambienceStimFrame, caller, std::move(callback)); diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index 8eb63c7..be96142 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -50,6 +50,7 @@ public: void compactCollateAndMeshFrameReq( AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame, std::optional> intensityStimFrame, + std::optional> ambienceStimFrame, Callback callback); private: @@ -63,11 +64,13 @@ private: bool startCollateKernel( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, std::optional> intensityStimFrame, + std::optional> ambienceStimFrame, collateKernelCbFn callback); void compactKernelComplete(bool isFinalizing=false); void collateKernelComplete( std::optional> intensityStimFrame, + std::optional> ambienceStimFrame, bool isFinalizing=false); bool stop(); @@ -138,7 +141,8 @@ private: StagingBuffer& assemblyBuff, uint32_t nSucceeded); bool setupCollateDgramsArgs( StagingBuffer& assemblyBuff, - std::optional> intensityStimFrame); + std::optional> intensityStimFrame, + std::optional> ambienceStimFrame); // Generic buffer mapping/unmapping for zero-copy synchronization bool mapBuffer( diff --git a/stimBuffApis/livoxGen1/pcloudAmbienceStimulusBuffer.h b/stimBuffApis/livoxGen1/pcloudAmbienceStimulusBuffer.h index c5c41c2..49c9056 100644 --- a/stimBuffApis/livoxGen1/pcloudAmbienceStimulusBuffer.h +++ b/stimBuffApis/livoxGen1/pcloudAmbienceStimulusBuffer.h @@ -2,6 +2,7 @@ #define _LIVOX_GEN1_PCLOUD_AMBIENCE_STIMULUS_BUFFER_H #include +#include #include #include @@ -25,11 +26,13 @@ public: const StagingBuffer::IOEngineConstraints& inputEngineConstraints, const StagingBuffer::IOEngineConstraints& outputEngineConstraints, const SmoCallbacks& callbacks, - cl_mem_flags flags) + cl_mem_flags flags, + uint32_t ambienceHighVal_) : StimulusBuffer( parent, deviceAttachmentSpec, histbuffMs, inputEngineConstraints, outputEngineConstraints, - callbacks, flags) + callbacks, flags), + ambienceHighVal(ambienceHighVal_) {} ~PcloudAmbienceStimulusBuffer() = default; @@ -39,6 +42,9 @@ public: PcloudAmbienceStimulusBuffer& operator=(const PcloudAmbienceStimulusBuffer&) = delete; PcloudAmbienceStimulusBuffer(PcloudAmbienceStimulusBuffer&&) = default; PcloudAmbienceStimulusBuffer& operator=(PcloudAmbienceStimulusBuffer&&) = default; + +public: + uint32_t ambienceHighVal; }; } // namespace stim_buff diff --git a/stimBuffApis/livoxGen1/pcloudStimulusProducer.cpp b/stimBuffApis/livoxGen1/pcloudStimulusProducer.cpp index 5e90db8..34c3100 100644 --- a/stimBuffApis/livoxGen1/pcloudStimulusProducer.cpp +++ b/stimBuffApis/livoxGen1/pcloudStimulusProducer.cpp @@ -56,7 +56,7 @@ static StagingBuffer::IOEngineConstraints openClAmbienceInputConstraints( // slotStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // slotPadToNBytes: This is dynamically calculated based on the return mode. - sizeof(float), + sizeof(uint32_t), // frameStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // framePadToNBytes (pointer size) @@ -295,18 +295,29 @@ std::cout << __func__ << ": $$$$$$$ Created PcloudIntensityStimulusBuffer" << st } else if (qualeIfaceApi == "pcloudAmbience") { + // Parse ambienceHighVal from stimBuffApiParams + const std::vector ambienceHighValParamNames = { + "high-value", + "high-val" + }; + int ambienceHighValInt = device::DeviceAttachmentSpec + ::parseOptionalParamAsIntWithSynonyms( + deviceAttachmentSpec->stimBuffApiParams, + ambienceHighValParamNames, 116); + uint32_t ambienceHighVal = static_cast(ambienceHighValInt); + /* Calculate slotStrideNBytes: - * nDgramsPerStagingBufferFrame * sizeof(float) + * nDgramsPerStagingBufferFrame * sizeof(uint32_t) */ size_t slotStrideNBytes = this->nDgramsPerStagingBufferFrame - * sizeof(float); + * sizeof(uint32_t); // Reuse openClAmbienceInputConstraints, only modify slotPadToNBytes openClAmbienceInputConstraints.slotPadToNBytes = slotStrideNBytes; auto ambienceBuffer = std::make_shared( *this, deviceAttachmentSpec, histbuffMs, openClAmbienceInputConstraints, openClAmbienceInputConstraints, - *smoHooksPtr, CL_MEM_READ_WRITE); + *smoHooksPtr, CL_MEM_READ_WRITE, ambienceHighVal); std::cout << __func__ << ": $$$$$$$ Created PcloudAmbienceStimulusBuffer" << std::endl; this->stop(); @@ -337,6 +348,7 @@ private: AsynchronousLoop frameAssemblyResult; StimulusFrame& stimulusFrame; std::optional> intensityStimFrame; + std::optional> ambienceStimFrame; public: ProduceFrameReq( @@ -412,8 +424,27 @@ public: context->intensityStimFrame = std::nullopt; } + // Check if ambience buffer is attached and acquire frame if so + if (pcloudProducer.ambienceStimulusBuffer) + { + size_t ambienceRingbuffIndex = pcloudProducer + .ambienceStimulusBuffer->ringBuffer.getIndexToProduceInto(); + + StimulusFrame& ambienceStimFrame = pcloudProducer + .ambienceStimulusBuffer->ringBuffer.getDataAtSlot( + ambienceRingbuffIndex); + + ambienceStimFrame.lock.writeAcquire(); + context->ambienceStimFrame = std::make_optional( + std::ref(ambienceStimFrame)); + } + else { + context->ambienceStimFrame = std::nullopt; + } + pcloudProducer.openClCollatingAndMeshingEngine.compactCollateAndMeshFrameReq( - loop, stimulusFrame, context->intensityStimFrame, + loop, stimulusFrame, + context->intensityStimFrame, context->ambienceStimFrame, {context, std::bind( &ProduceFrameReq::produceFrameReq3_compactCollateDone, context.get(), context, @@ -428,6 +459,10 @@ public: if (context->intensityStimFrame.has_value()) { context->intensityStimFrame->get().lock.writeRelease(); } + // Release ambience frame if it was used + if (context->ambienceStimFrame.has_value()) { + context->ambienceStimFrame->get().lock.writeRelease(); + } SpinLock::Guard lock(pcloudProducer.shouldContinueLock); if (!pcloudProducer.shouldContinue)