diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 7ab6161..3d452b0 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -26,7 +26,7 @@ isSetup(false), clAssemblyBuffer(nullptr), clCollationBuffer(nullptr), isRunning(false), -currentKernelEvent(nullptr), +currentCompactKernelEvent(nullptr), currentCollateKernelEvent(nullptr), assemblyBufferPtr(nullptr), assemblyBufferSize(0), collationBufferPtr(nullptr), @@ -139,8 +139,7 @@ bool OpenClCollatingAndMeshingEngine::setup() } // Compile and prepare both kernels - if (!compileAndPrepareKernels()) - { + if (!compileAndPrepareKernels()) { goto cleanup; } @@ -212,7 +211,8 @@ void OpenClCollatingAndMeshingEngine::finalize() platform = nullptr; isSetup = false; isRunning = false; - currentKernelEvent = nullptr; + currentCompactKernelEvent = nullptr; + currentCollateKernelEvent = nullptr; assemblyBufferPtr = nullptr; assemblyBufferSize = 0; collationBufferPtr = nullptr; @@ -222,7 +222,7 @@ void OpenClCollatingAndMeshingEngine::finalize() // Static callback for compact kernel event void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback( - cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data) + cl_event /*event*/, cl_int event_command_exec_status, void* user_data) { OpenClCollatingAndMeshingEngine* engine = static_cast(user_data); @@ -234,13 +234,13 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback( if (engine->parent.device && engine->parent.device->componentThread) { engine->parent.device->componentThread->getIoService().post( - engine->compactKernelCb); + std::bind(engine->compactKernelCb, event_command_exec_status)); } } // Static callback for collate kernel event void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback( - cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data) + cl_event /*event*/, cl_int event_command_exec_status, void* user_data) { OpenClCollatingAndMeshingEngine* engine = static_cast(user_data); @@ -252,7 +252,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback( if (engine->parent.device && engine->parent.device->componentThread) { engine->parent.device->componentThread->getIoService().post( - engine->collateKernelCb); + std::bind(engine->collateKernelCb, event_command_exec_status)); } } @@ -260,141 +260,74 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel( StagingBuffer& assemblyBuff, uint32_t nSucceeded, compactKernelCbFn callback) { - if (!isSetup) - { - std::cerr << __func__ << ": engine not set up" << std::endl; - return false; - } - - if (isRunning) - { - std::cerr << __func__ << ": already running, call stop() first" - << std::endl; - return false; - } - - // Validate buffers match what we set up - struct iovec assemblyIov = assemblyBuff.getClEngineIovec(); - - if (assemblyIov.iov_base != assemblyBufferPtr - || assemblyIov.iov_len != assemblyBufferSize) - { - throw std::runtime_error( - std::string(__func__) + ": buffer mismatch - buffers have changed"); - } - // Store the caller's callback - compactKernelCb = callback; + compactKernelCb = std::move(callback); - // Set up kernel arguments for slotCompactor - if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) { - return false; - } + // Validate buffers callable + auto validateBuffers = [this, &assemblyBuff]() { + struct iovec assemblyIov = assemblyBuff.getClEngineIovec(); + if (assemblyIov.iov_base != assemblyBufferPtr + || assemblyIov.iov_len != assemblyBufferSize) + { + throw std::runtime_error( + std::string(__func__) + ": buffer mismatch - buffers have changed"); + } + }; - // Enqueue slotCompactor kernel execution (single work item for sequential processing) - size_t globalWorkSize = 1; - cl_int err = clEnqueueNDRangeKernel( - commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr, - 0, nullptr, ¤tKernelEvent); + // Setup args callable + auto setupArgs = [this, &assemblyBuff, nSucceeded]() { + return setupSlotCompactorsArgs(assemblyBuff, nSucceeded); + }; - if (err != CL_SUCCESS) - { - std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: " - << err << std::endl; - return false; - } - - // Set up callback using static member function - err = clSetEventCallback( - currentKernelEvent, CL_COMPLETE, compactKernelEventCallback, this); - - if (err != CL_SUCCESS) - { - std::cerr << __func__ << ": failed to set event callback: " << err - << std::endl; - clReleaseEvent(currentKernelEvent); - currentKernelEvent = nullptr; - return false; - } - - isRunning = true; - // startCompactKernel() is synchronous - it returns immediately after setting up kernel execution - // The callback will be invoked when the kernel completes - return true; + return startKernel( + slotCompactorKernel, + ¤tCompactKernelEvent, + setupArgs, + validateBuffers, + 1, // globalWorkSize + compactKernelEventCallback, + "slotCompactor"); } bool OpenClCollatingAndMeshingEngine::startCollateKernel( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, collateKernelCbFn callback) { - if (!isSetup) - { - std::cerr << __func__ << ": engine not set up" << std::endl; - return false; - } - - if (isRunning) - { - std::cerr << __func__ << ": already running, call stop() first" - << std::endl; - return false; - } - - // Validate buffers match what we set up - struct iovec assemblyIov = assemblyBuff.getClEngineIovec(); - struct iovec collationIov = collationBuff.getClEngineIovec(); - - if (assemblyIov.iov_base != assemblyBufferPtr - || assemblyIov.iov_len != assemblyBufferSize - || collationIov.iov_base != collationBufferPtr - || collationIov.iov_len != collationBufferSize) - { - throw std::runtime_error( - std::string(__func__) + ": buffer mismatch - buffers have changed"); - } - // Store the caller's callback - collateKernelCb = callback; + collateKernelCb = std::move(callback); - // Set up kernel arguments for collateDgrams - if (!setupCollateDgramsArgs(assemblyBuff)) { - return false; - } + // Validate buffers callable + auto validateBuffers = [this, &assemblyBuff, &collationBuff]() { + struct iovec assemblyIov = assemblyBuff.getClEngineIovec(); + struct iovec collationIov = collationBuff.getClEngineIovec(); + if (assemblyIov.iov_base != assemblyBufferPtr + || assemblyIov.iov_len != assemblyBufferSize + || collationIov.iov_base != collationBufferPtr + || collationIov.iov_len != collationBufferSize) + { + throw std::runtime_error( + std::string(__func__) + ": buffer mismatch - buffers have changed"); + } + }; - // Enqueue collateDgrams kernel execution - // NDRange is nDgramsPerFrame (one work item per slot) + // Setup args callable + auto setupArgs = [this, &assemblyBuff]() { + return setupCollateDgramsArgs(assemblyBuff); + }; + + // Calculate global work size uint32_t nDgramsPerFrame = static_cast( frameAssemblyDesc->numSlots); - size_t globalWorkSize = nDgramsPerFrame; - cl_int err = clEnqueueNDRangeKernel( - commandQueue, collateKernel, 1, nullptr, &globalWorkSize, nullptr, - 0, nullptr, ¤tKernelEvent); - if (err != CL_SUCCESS) - { - std::cerr << __func__ << ": failed to enqueue collateDgrams kernel: " - << err << std::endl; - return false; - } - - // Set up callback using static member function - err = clSetEventCallback( - currentKernelEvent, CL_COMPLETE, collateKernelEventCallback, this); - - if (err != CL_SUCCESS) - { - std::cerr << __func__ << ": failed to set event callback: " << err - << std::endl; - clReleaseEvent(currentKernelEvent); - currentKernelEvent = nullptr; - return false; - } - - isRunning = true; - // startCollateKernel() is synchronous - it returns immediately after setting up kernel execution - // The callback will be invoked when the kernel completes - return true; + return startKernel( + collateKernel, + ¤tCollateKernelEvent, + setupArgs, + validateBuffers, + globalWorkSize, + collateKernelEventCallback, + "collateDgrams"); } bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel( @@ -621,14 +554,24 @@ void OpenClCollatingAndMeshingEngine::stop() } // Cancel kernel execution if possible - if (currentKernelEvent) + if (currentCompactKernelEvent) { // Note: OpenCL doesn't have a standard way to cancel kernel execution // We can try to wait for it to complete or just release the event // For now, we'll just wait for it to complete - clWaitForEvents(1, ¤tKernelEvent); - clReleaseEvent(currentKernelEvent); - currentKernelEvent = nullptr; + clWaitForEvents(1, ¤tCompactKernelEvent); + clReleaseEvent(currentCompactKernelEvent); + currentCompactKernelEvent = nullptr; + } + + if (currentCollateKernelEvent) + { + // Note: OpenCL doesn't have a standard way to cancel kernel execution + // We can try to wait for it to complete or just release the event + // For now, we'll just wait for it to complete + clWaitForEvents(1, ¤tCollateKernelEvent); + clReleaseEvent(currentCollateKernelEvent); + currentCollateKernelEvent = nullptr; } isRunning = false; @@ -637,13 +580,13 @@ void OpenClCollatingAndMeshingEngine::stop() void OpenClCollatingAndMeshingEngine::stopCompactKernel() { stop(); - compactKernelCb = [](){}; + compactKernelCb = [](cl_int){}; } void OpenClCollatingAndMeshingEngine::stopCollateKernel() { stop(); - collateKernelCb = [](){}; + collateKernelCb = [](cl_int){}; } } // namespace stim_buff diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index cab3b33..7bd9825 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include #define CL_TARGET_OPENCL_VERSION 300 #include #include "stagingBuffer.h" @@ -36,8 +38,8 @@ public: void finalize(); // Callback function types - typedef std::function compactKernelCbFn; - typedef std::function collateKernelCbFn; + typedef std::function compactKernelCbFn; + typedef std::function collateKernelCbFn; bool startCompactKernel( StagingBuffer& assemblyBuff, uint32_t nSucceeded, @@ -70,7 +72,8 @@ private: // State tracking bool isRunning; - cl_event currentKernelEvent; + cl_event currentCompactKernelEvent; + cl_event currentCollateKernelEvent; // Memory tracking void* assemblyBufferPtr; @@ -99,6 +102,67 @@ private: bool setupSlotCompactorsArgs( StagingBuffer& assemblyBuff, uint32_t nSucceeded); bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff); + + // Unified kernel start function + template + bool startKernel( + cl_kernel kernel, + cl_event* eventPtr, + SetupArgsFn setupArgsFn, + ValidateBuffersFn validateBuffersFn, + size_t globalWorkSize, + void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*), + const char* kernelName) + { + if (!isSetup) + { + std::cerr << __func__ << ": engine not set up" << std::endl; + return false; + } + + if (isRunning) + { + std::cerr << __func__ << ": already running, call stop() first" + << std::endl; + return false; + } + + // Validate buffers + validateBuffersFn(); + + // Set up kernel arguments + if (!setupArgsFn()) { + return false; + } + + // Enqueue kernel execution + cl_int err = clEnqueueNDRangeKernel( + commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr, + 0, nullptr, eventPtr); + + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to enqueue " << kernelName + << " kernel: " << err << std::endl; + return false; + } + + // Set up callback using static member function + err = clSetEventCallback( + *eventPtr, CL_COMPLETE, eventCallback, this); + + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set event callback: " << err + << std::endl; + clReleaseEvent(*eventPtr); + *eventPtr = nullptr; + return false; + } + + isRunning = true; + return true; + } }; } // namespace stim_buff