diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 10b0051..f7eb1c2 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -4,6 +4,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include "livoxGen1.h" #include "openClCollatingAndMeshingEngine.h" #include "pcloudStimulusBuffer.h" #include "openClKernels.h" @@ -234,7 +240,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback( { return; } // Stop the compact kernel - engine->stopCompactKernel(); +// engine->stopCompactKernel(); // Post to io_service to call callback on the correct thread if (engine->parent.device && engine->parent.device->componentThread) @@ -255,7 +261,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback( { return; } // Stop the collate kernel - engine->stopCollateKernel(); +// engine->stopCollateKernel(); // Post to io_service to call callback on the correct thread if (engine->parent.device && engine->parent.device->componentThread) @@ -592,6 +598,137 @@ void OpenClCollatingAndMeshingEngine::stopCollateKernel() collateKernelCb = [](cl_int){}; } +class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq +: public PostedAsynchronousContinuation +{ +private: + OpenClCollatingAndMeshingEngine& engine; + AsynchronousLoop frameAssemblyResult; + StimulusFrame& stimulusFrame; + +public: + CompactCollateAndMeshFrameReq( + OpenClCollatingAndMeshingEngine& engine_, + AsynchronousLoop& asyncLoop, + StimulusFrame& stimulusFrame_, + const std::shared_ptr& caller, + Callback cb) + : PostedAsynchronousContinuation( + caller, cb), + engine(engine_), + frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_) + {} + +public: + void callOriginalCallback(bool success) + { callOriginalCb(success, std::ref(stimulusFrame)); } + +public: + void compactCollateAndMeshFrameReq1_doCompact_posted( + std::shared_ptr context) + { + bool success = engine.startCompactKernel( + engine.parent.assemblyBuffer, + static_cast(context->frameAssemblyResult.nSucceeded.load()), + std::bind( + &CompactCollateAndMeshFrameReq + ::compactCollateAndMeshFrameReq2_compactDone_posted, + context.get(), context, + std::placeholders::_1)); + + if (!success) + { + callOriginalCallback(false); + return; + } + } + + void compactCollateAndMeshFrameReq2_compactDone_posted( + std::shared_ptr context, + cl_int compactStatus) + { + engine.stopCompactKernel(); + + // If compact failed, call callback directly with failure + if (compactStatus != CL_SUCCESS) + { + callOriginalCallback(false); + return; + } + + // Print first 4 bytes of each slot + if (engine.frameAssemblyDesc) + { + for (size_t i = 0; i < engine.frameAssemblyDesc->numSlots; ++i) { + engine.parent.ioUringAssemblyEngine.printSlotBytes(i, 4); + } + } + + context->compactCollateAndMeshFrameReq3_doCollate_posted(context); + } + + void compactCollateAndMeshFrameReq3_doCollate_posted( + std::shared_ptr context) + { + bool success = engine.startCollateKernel( + engine.parent.assemblyBuffer, engine.parent.collationBuffer, + std::bind( + &CompactCollateAndMeshFrameReq + ::compactCollateAndMeshFrameReq4_collateDone_maybePosted, + context.get(), context, + std::placeholders::_1)); + + if (!success) + { + callOriginalCallback(false); + return; + } + } + + void compactCollateAndMeshFrameReq4_collateDone_maybePosted( + [[maybe_unused]] std::shared_ptr context, + cl_int collateStatus) + { + engine.stopCollateKernel(); + + bool success = (collateStatus == CL_SUCCESS); + callOriginalCallback(success); + } +}; + +void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq( + AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame, + Callback callback) +{ + auto caller = smoHooksPtr->ComponentThread_getSelf(); + auto request = std::make_shared( + *this, asyncLoop, stimulusFrame, + caller, + std::move(callback)); + + // Check if compaction is needed + bool needsCompaction = IoUringAssemblyEngine::compactionIsNeeded( + asyncLoop.nSucceeded.load(), asyncLoop.nTotal); + + // Start with compaction if needed, then chain to collation + if (needsCompaction) + { + parent.device->componentThread->getIoService().post( + STC(std::bind( + &CompactCollateAndMeshFrameReq + ::compactCollateAndMeshFrameReq1_doCompact_posted, + request.get(), request))); + } + else + { + // Skip compaction, go straight to collation + parent.device->componentThread->getIoService().post( + STC(std::bind( + &CompactCollateAndMeshFrameReq + ::compactCollateAndMeshFrameReq3_doCollate_posted, + request.get(), request))); + } +} + } // namespace stim_buff } // namespace smo - diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index e747897..ecf4825 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -10,6 +10,9 @@ #include #define CL_TARGET_OPENCL_VERSION 300 #include +#include +#include +#include #include "stagingBuffer.h" #include "frameAssemblyDesc.h" @@ -40,6 +43,8 @@ public: // Callback function types typedef std::function compactKernelCbFn; typedef std::function collateKernelCbFn; + typedef std::function + compactCollateAndMeshFrameReqCbFn; bool startCompactKernel( StagingBuffer& assemblyBuff, uint32_t nSucceeded, @@ -52,6 +57,10 @@ public: void stopCollateKernel(); void stop(); + void compactCollateAndMeshFrameReq( + AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame, + Callback callback); + private: PcloudStimulusBuffer& parent; @@ -106,6 +115,9 @@ private: StagingBuffer& assemblyBuff, uint32_t nSucceeded); bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff); + // Forward declaration for continuation class + class CompactCollateAndMeshFrameReq; + // Unified kernel start function template bool startKernel(