diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index d0fd3d5..6cd1f5b 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -30,7 +30,8 @@ currentKernelEvent(nullptr), assemblyBufferPtr(nullptr), assemblyBufferSize(0), collationBufferPtr(nullptr), -collationBufferSize(0) +collationBufferSize(0), +frameAssemblyDesc(nullptr) { } @@ -100,6 +101,16 @@ bool OpenClCollatingAndMeshingEngine::setup() collationBufferPtr = collationIov.iov_base; collationBufferSize = collationIov.iov_len; + // Get FrameAssemblyDesc from assembly buffer + frameAssemblyDesc = static_cast>( + parent.assemblyBuffer); + + if (!frameAssemblyDesc || frameAssemblyDesc->slots.empty()) + { + std::cerr << __func__ << ": invalid frame descriptor" << std::endl; + goto cleanup; + } + // Create OpenCL buffers using CL_MEM_USE_HOST_PTR for zero-copy clAssemblyBuffer = clCreateBuffer( context, @@ -206,6 +217,7 @@ void OpenClCollatingAndMeshingEngine::finalize() assemblyBufferSize = 0; collationBufferPtr = nullptr; collationBufferSize = 0; + frameAssemblyDesc = nullptr; } // Static callback for OpenCL event @@ -226,7 +238,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback( } } -void OpenClCollatingAndMeshingEngine::start( +bool OpenClCollatingAndMeshingEngine::start( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, collateFrameReqCbFn callback, uint32_t nSucceeded) @@ -234,14 +246,14 @@ void OpenClCollatingAndMeshingEngine::start( if (!isSetup) { std::cerr << __func__ << ": engine not set up" << std::endl; - return; + return false; } if (isRunning) { std::cerr << __func__ << ": already running, call stop() first" << std::endl; - return; + return false; } // Validate buffers match what we set up @@ -249,38 +261,24 @@ void OpenClCollatingAndMeshingEngine::start( struct iovec collationIov = collationBuff.getClEngineIovec(); if (assemblyIov.iov_base != assemblyBufferPtr - || assemblyIov.iov_len != assemblyBufferSize) - { - std::cerr << __func__ << ": assembly buffer mismatch" << std::endl; - return; - } - - if (collationIov.iov_base != collationBufferPtr + || assemblyIov.iov_len != assemblyBufferSize + || collationIov.iov_base != collationBufferPtr || collationIov.iov_len != collationBufferSize) { - std::cerr << __func__ << ": collation buffer mismatch" << std::endl; - return; + throw std::runtime_error( + std::string(__func__) + ": buffer mismatch - buffers have changed"); } // Store the caller's callback collateFrameReqCb = callback; - // Get FrameAssemblyDesc from assembly buffer - std::shared_ptr frameDesc = - static_cast>(assemblyBuff); - if (!frameDesc || frameDesc->slots.empty()) - { - std::cerr << __func__ << ": invalid frame descriptor" << std::endl; - return; - } - // Set up kernel arguments for slotCompactor if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) { - return; + return false; } // Set up kernel arguments for collateDgrams if (!setupCollateDgramsArgs(assemblyBuff)) { - return; + return false; } // Enqueue slotCompactor kernel execution (single work item for sequential processing) @@ -293,7 +291,7 @@ void OpenClCollatingAndMeshingEngine::start( { std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: " << err << std::endl; - return; + return false; } // Set up callback using static member function @@ -310,7 +308,7 @@ void OpenClCollatingAndMeshingEngine::start( << std::endl; clReleaseEvent(currentKernelEvent); currentKernelEvent = nullptr; - return; + return false; } // TODO: Set up timeout timer in continuation class @@ -319,6 +317,7 @@ void OpenClCollatingAndMeshingEngine::start( isRunning = true; // start() is synchronous - it returns immediately after setting up kernel execution // The callback will be invoked when the kernel completes + return true; } bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel( @@ -398,19 +397,10 @@ bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernels() bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs( StagingBuffer& assemblyBuff, uint32_t nSucceeded) { - // Get FrameAssemblyDesc from assembly buffer - std::shared_ptr frameDesc = - static_cast>(assemblyBuff); - if (!frameDesc || frameDesc->slots.empty()) - { - std::cerr << __func__ << ": invalid frame descriptor" << std::endl; - return false; - } - // Extract parameters for slotCompactor kernel - uint32_t numSlots = static_cast(frameDesc->numSlots); + uint32_t numSlots = static_cast(frameAssemblyDesc->numSlots); uint32_t slotStride = static_cast(assemblyBuff.slotStrideNBytes); - uint32_t slotSize = static_cast(frameDesc->slotSizeBytes); + uint32_t slotSize = static_cast(frameAssemblyDesc->slotSizeBytes); uint32_t firstSlotOffset = static_cast( assemblyBuff.firstSlotOffsetNBytes); uint32_t nSucceededUint = static_cast(nSucceeded); @@ -477,15 +467,6 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs( bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( StagingBuffer& assemblyBuff) { - // Get FrameAssemblyDesc from assembly buffer - std::shared_ptr frameDesc = - static_cast>(assemblyBuff); - if (!frameDesc || frameDesc->slots.empty()) - { - std::cerr << __func__ << ": invalid frame descriptor" << std::endl; - return false; - } - // Extract parameters for collateDgrams kernel uint32_t slotStride = static_cast(assemblyBuff.slotStrideNBytes); uint32_t firstSlotOffset = static_cast( @@ -500,7 +481,8 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( int returnMode = static_cast(parent.device->currentReturnMode); uint32_t nPointsPerSlot = static_cast( IoUringAssemblyEngine::computePointsPerDgram(returnMode)); - uint32_t nDgramsPerFrame = static_cast(frameDesc->numSlots); + uint32_t nDgramsPerFrame = static_cast( + frameAssemblyDesc->numSlots); // Set kernel arguments for collateDgrams cl_int err; diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index c06cdce..4648b60 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -9,6 +9,7 @@ #define CL_TARGET_OPENCL_VERSION 300 #include #include "stagingBuffer.h" +#include "frameAssemblyDesc.h" namespace smo { namespace stim_buff { @@ -37,7 +38,7 @@ public: // Callback function type for collateFrameReq typedef std::function collateFrameReqCbFn; - void start( + bool start( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, collateFrameReqCbFn callback, uint32_t nSucceeded); @@ -71,6 +72,9 @@ private: void* collationBufferPtr; size_t collationBufferSize; + // Frame descriptor (cached from setup) + std::shared_ptr frameAssemblyDesc; + // Callback storage collateFrameReqCbFn collateFrameReqCb;