From b331af4f0381656d07c1a54ce7e50252e8236079 Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Sun, 9 Nov 2025 16:12:10 -0400 Subject: [PATCH] ClCollMeshEngn: Split start into start[Collate|Compact]Kernel() These prepare each kernel separately. We'll unify them further. --- .../openClCollatingAndMeshingEngine.cpp | 140 ++++++++++++++---- .../openClCollatingAndMeshingEngine.h | 25 +++- 2 files changed, 131 insertions(+), 34 deletions(-) diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 6cd1f5b..7ab6161 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -220,28 +220,112 @@ void OpenClCollatingAndMeshingEngine::finalize() frameAssemblyDesc = nullptr; } -// Static callback for OpenCL event -void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback( +// Static callback for compact kernel event +void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback( cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data) { OpenClCollatingAndMeshingEngine* engine = static_cast(user_data); - if (!engine || !engine->isRunning || !engine->collateFrameReqCb) + if (!engine || !engine->isRunning || !engine->compactKernelCb) { return; } // Post to io_service to call callback on the correct thread if (engine->parent.device && engine->parent.device->componentThread) { engine->parent.device->componentThread->getIoService().post( - engine->collateFrameReqCb); + engine->compactKernelCb); } } -bool OpenClCollatingAndMeshingEngine::start( +// Static callback for collate kernel event +void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback( + cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data) +{ + OpenClCollatingAndMeshingEngine* engine = + static_cast(user_data); + + if (!engine || !engine->isRunning || !engine->collateKernelCb) + { return; } + + // Post to io_service to call callback on the correct thread + if (engine->parent.device && engine->parent.device->componentThread) + { + engine->parent.device->componentThread->getIoService().post( + engine->collateKernelCb); + } +} + +bool OpenClCollatingAndMeshingEngine::startCompactKernel( + StagingBuffer& assemblyBuff, uint32_t nSucceeded, + compactKernelCbFn callback) +{ + if (!isSetup) + { + std::cerr << __func__ << ": engine not set up" << std::endl; + return false; + } + + if (isRunning) + { + std::cerr << __func__ << ": already running, call stop() first" + << std::endl; + return false; + } + + // Validate buffers match what we set up + struct iovec assemblyIov = assemblyBuff.getClEngineIovec(); + + if (assemblyIov.iov_base != assemblyBufferPtr + || assemblyIov.iov_len != assemblyBufferSize) + { + throw std::runtime_error( + std::string(__func__) + ": buffer mismatch - buffers have changed"); + } + + // Store the caller's callback + compactKernelCb = callback; + + // Set up kernel arguments for slotCompactor + if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) { + return false; + } + + // Enqueue slotCompactor kernel execution (single work item for sequential processing) + size_t globalWorkSize = 1; + cl_int err = clEnqueueNDRangeKernel( + commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr, + 0, nullptr, ¤tKernelEvent); + + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: " + << err << std::endl; + return false; + } + + // Set up callback using static member function + err = clSetEventCallback( + currentKernelEvent, CL_COMPLETE, compactKernelEventCallback, this); + + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set event callback: " << err + << std::endl; + clReleaseEvent(currentKernelEvent); + currentKernelEvent = nullptr; + return false; + } + + isRunning = true; + // startCompactKernel() is synchronous - it returns immediately after setting up kernel execution + // The callback will be invoked when the kernel completes + return true; +} + +bool OpenClCollatingAndMeshingEngine::startCollateKernel( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, - collateFrameReqCbFn callback, - uint32_t nSucceeded) + collateKernelCbFn callback) { if (!isSetup) { @@ -270,37 +354,33 @@ bool OpenClCollatingAndMeshingEngine::start( } // Store the caller's callback - collateFrameReqCb = callback; + collateKernelCb = callback; - // Set up kernel arguments for slotCompactor - if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) { - return false; - } // Set up kernel arguments for collateDgrams if (!setupCollateDgramsArgs(assemblyBuff)) { return false; } - // Enqueue slotCompactor kernel execution (single work item for sequential processing) - size_t globalWorkSize = 1; + // Enqueue collateDgrams kernel execution + // NDRange is nDgramsPerFrame (one work item per slot) + uint32_t nDgramsPerFrame = static_cast( + frameAssemblyDesc->numSlots); + + size_t globalWorkSize = nDgramsPerFrame; cl_int err = clEnqueueNDRangeKernel( - commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr, + commandQueue, collateKernel, 1, nullptr, &globalWorkSize, nullptr, 0, nullptr, ¤tKernelEvent); if (err != CL_SUCCESS) { - std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: " + std::cerr << __func__ << ": failed to enqueue collateDgrams kernel: " << err << std::endl; return false; } // Set up callback using static member function - // We need to pass 'this' as user_data, but we need a shared_ptr - // For now, we'll use a workaround: store 'this' and use it carefully - // Actually, we should use a different approach - use a shared_ptr wrapper - // But for now, let's use a simpler approach with proper lifetime management err = clSetEventCallback( - currentKernelEvent, CL_COMPLETE, kernelEventCallback, this); + currentKernelEvent, CL_COMPLETE, collateKernelEventCallback, this); if (err != CL_SUCCESS) { @@ -311,11 +391,8 @@ bool OpenClCollatingAndMeshingEngine::start( return false; } - // TODO: Set up timeout timer in continuation class - // For now, timeout handling will be in the CollateFrameReq continuation - isRunning = true; - // start() is synchronous - it returns immediately after setting up kernel execution + // startCollateKernel() is synchronous - it returns immediately after setting up kernel execution // The callback will be invoked when the kernel completes return true; } @@ -555,7 +632,18 @@ void OpenClCollatingAndMeshingEngine::stop() } isRunning = false; - collateFrameReqCb = nullptr; +} + +void OpenClCollatingAndMeshingEngine::stopCompactKernel() +{ + stop(); + compactKernelCb = [](){}; +} + +void OpenClCollatingAndMeshingEngine::stopCollateKernel() +{ + stop(); + collateKernelCb = [](){}; } } // namespace stim_buff diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index 4648b60..cab3b33 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -35,13 +35,19 @@ public: bool setup(); void finalize(); - // Callback function type for collateFrameReq - typedef std::function collateFrameReqCbFn; + // Callback function types + typedef std::function compactKernelCbFn; + typedef std::function collateKernelCbFn; - bool start( + bool startCompactKernel( + StagingBuffer& assemblyBuff, uint32_t nSucceeded, + compactKernelCbFn callback); + bool startCollateKernel( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, - collateFrameReqCbFn callback, - uint32_t nSucceeded); + collateKernelCbFn callback); + + void stopCompactKernel(); + void stopCollateKernel(); void stop(); private: @@ -76,10 +82,13 @@ private: std::shared_ptr frameAssemblyDesc; // Callback storage - collateFrameReqCbFn collateFrameReqCb; + compactKernelCbFn compactKernelCb; + collateKernelCbFn collateKernelCb; - // Static callback for OpenCL event - static void CL_CALLBACK kernelEventCallback( + // Static callbacks for OpenCL events + static void CL_CALLBACK compactKernelEventCallback( + cl_event event, cl_int event_command_exec_status, void* user_data); + static void CL_CALLBACK collateKernelEventCallback( cl_event event, cl_int event_command_exec_status, void* user_data); // Private helper methods