From eedeb4b803b83a17e89243a9c905bb14e0403c2c Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Mon, 10 Nov 2025 00:58:48 -0400 Subject: [PATCH] OClCollMeshEngn: Add method compactCollateAndMeshFrameReq This method takes an input assembly buffer and selects which OpenCL kernels need to be executed on that buffer to transform the input data into the eventual output StimulusFrame for the current timeslice period. --- .../openClCollatingAndMeshingEngine.cpp | 143 +++++++++++++++++- .../openClCollatingAndMeshingEngine.h | 12 ++ 2 files changed, 152 insertions(+), 3 deletions(-) diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 10b0051..f7eb1c2 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -4,6 +4,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include "livoxGen1.h" #include "openClCollatingAndMeshingEngine.h" #include "pcloudStimulusBuffer.h" #include "openClKernels.h" @@ -234,7 +240,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback( { return; } // Stop the compact kernel - engine->stopCompactKernel(); +// engine->stopCompactKernel(); // Post to io_service to call callback on the correct thread if (engine->parent.device && engine->parent.device->componentThread) @@ -255,7 +261,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback( { return; } // Stop the collate kernel - engine->stopCollateKernel(); +// engine->stopCollateKernel(); // Post to io_service to call callback on the correct thread if (engine->parent.device && engine->parent.device->componentThread) @@ -592,6 +598,137 @@ void OpenClCollatingAndMeshingEngine::stopCollateKernel() collateKernelCb = [](cl_int){}; } +class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq +: public PostedAsynchronousContinuation +{ +private: + OpenClCollatingAndMeshingEngine& engine; + AsynchronousLoop frameAssemblyResult; + StimulusFrame& stimulusFrame; + +public: + CompactCollateAndMeshFrameReq( + OpenClCollatingAndMeshingEngine& engine_, + AsynchronousLoop& asyncLoop, + StimulusFrame& stimulusFrame_, + const std::shared_ptr& caller, + Callback cb) + : PostedAsynchronousContinuation( + caller, cb), + engine(engine_), + frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_) + {} + +public: + void callOriginalCallback(bool success) + { callOriginalCb(success, std::ref(stimulusFrame)); } + +public: + void compactCollateAndMeshFrameReq1_doCompact_posted( + std::shared_ptr context) + { + bool success = engine.startCompactKernel( + engine.parent.assemblyBuffer, + static_cast(context->frameAssemblyResult.nSucceeded.load()), + std::bind( + &CompactCollateAndMeshFrameReq + ::compactCollateAndMeshFrameReq2_compactDone_posted, + context.get(), context, + std::placeholders::_1)); + + if (!success) + { + callOriginalCallback(false); + return; + } + } + + void compactCollateAndMeshFrameReq2_compactDone_posted( + std::shared_ptr context, + cl_int compactStatus) + { + engine.stopCompactKernel(); + + // If compact failed, call callback directly with failure + if (compactStatus != CL_SUCCESS) + { + callOriginalCallback(false); + return; + } + + // Print first 4 bytes of each slot + if (engine.frameAssemblyDesc) + { + for (size_t i = 0; i < engine.frameAssemblyDesc->numSlots; ++i) { + engine.parent.ioUringAssemblyEngine.printSlotBytes(i, 4); + } + } + + context->compactCollateAndMeshFrameReq3_doCollate_posted(context); + } + + void compactCollateAndMeshFrameReq3_doCollate_posted( + std::shared_ptr context) + { + bool success = engine.startCollateKernel( + engine.parent.assemblyBuffer, engine.parent.collationBuffer, + std::bind( + &CompactCollateAndMeshFrameReq + ::compactCollateAndMeshFrameReq4_collateDone_maybePosted, + context.get(), context, + std::placeholders::_1)); + + if (!success) + { + callOriginalCallback(false); + return; + } + } + + void compactCollateAndMeshFrameReq4_collateDone_maybePosted( + [[maybe_unused]] std::shared_ptr context, + cl_int collateStatus) + { + engine.stopCollateKernel(); + + bool success = (collateStatus == CL_SUCCESS); + callOriginalCallback(success); + } +}; + +void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq( + AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame, + Callback callback) +{ + auto caller = smoHooksPtr->ComponentThread_getSelf(); + auto request = std::make_shared( + *this, asyncLoop, stimulusFrame, + caller, + std::move(callback)); + + // Check if compaction is needed + bool needsCompaction = IoUringAssemblyEngine::compactionIsNeeded( + asyncLoop.nSucceeded.load(), asyncLoop.nTotal); + + // Start with compaction if needed, then chain to collation + if (needsCompaction) + { + parent.device->componentThread->getIoService().post( + STC(std::bind( + &CompactCollateAndMeshFrameReq + ::compactCollateAndMeshFrameReq1_doCompact_posted, + request.get(), request))); + } + else + { + // Skip compaction, go straight to collation + parent.device->componentThread->getIoService().post( + STC(std::bind( + &CompactCollateAndMeshFrameReq + ::compactCollateAndMeshFrameReq3_doCollate_posted, + request.get(), request))); + } +} + } // namespace stim_buff } // namespace smo - diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index e747897..ecf4825 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -10,6 +10,9 @@ #include #define CL_TARGET_OPENCL_VERSION 300 #include +#include +#include +#include #include "stagingBuffer.h" #include "frameAssemblyDesc.h" @@ -40,6 +43,8 @@ public: // Callback function types typedef std::function compactKernelCbFn; typedef std::function collateKernelCbFn; + typedef std::function + compactCollateAndMeshFrameReqCbFn; bool startCompactKernel( StagingBuffer& assemblyBuff, uint32_t nSucceeded, @@ -52,6 +57,10 @@ public: void stopCollateKernel(); void stop(); + void compactCollateAndMeshFrameReq( + AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame, + Callback callback); + private: PcloudStimulusBuffer& parent; @@ -106,6 +115,9 @@ private: StagingBuffer& assemblyBuff, uint32_t nSucceeded); bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff); + // Forward declaration for continuation class + class CompactCollateAndMeshFrameReq; + // Unified kernel start function template bool startKernel(