diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 58e226f..5b17c33 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -8,6 +8,7 @@ #include "openClCollatingAndMeshingEngine.h" #include "pcloudStimulusBuffer.h" #include "openClKernels.h" +#include "frameAssemblyDesc.h" namespace smo { namespace stim_buff { @@ -147,8 +148,8 @@ bool OpenClCollatingAndMeshingEngine::setup() } // Create program and kernel from external source - kernelSource = collateKernelStart; - kernelSourceLen = collateKernelNBytes; + kernelSource = slotCompactorKernelStart; + kernelSourceLen = slotCompactorKernelNBytes; program = clCreateProgramWithSource( context, 1, &kernelSource, &kernelSourceLen, &err); @@ -181,7 +182,7 @@ bool OpenClCollatingAndMeshingEngine::setup() goto cleanup; } - kernel = clCreateKernel(program, "collate", &err); + kernel = clCreateKernel(program, "slotCompactor", &err); if (err != CL_SUCCESS || !kernel) { std::cerr << __func__ << ": failed to create kernel: " @@ -287,7 +288,8 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback( void OpenClCollatingAndMeshingEngine::start( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, - collateFrameReqCbFn callback) + collateFrameReqCbFn callback, + uint32_t nSucceeded) { if (!isSetup) { @@ -322,7 +324,22 @@ void OpenClCollatingAndMeshingEngine::start( // Store the caller's callback collateFrameReqCb = callback; - // Set kernel arguments + // Get FrameAssemblyDesc from assembly buffer + std::shared_ptr frameDesc = + static_cast>(assemblyBuff); + if (!frameDesc || frameDesc->slots.empty()) + { + std::cerr << __func__ << ": invalid frame descriptor" << std::endl; + return; + } + + // Extract parameters for slotCompactor kernel + uint32_t numSlots = static_cast(frameDesc->numSlots); + uint32_t slotStride = static_cast(assemblyBuff.slotStrideNBytes); + uint32_t slotSize = static_cast(frameDesc->slotSizeBytes); + uint32_t firstSlotOffset = static_cast(assemblyBuff.firstSlotOffsetNBytes); + + // Set kernel arguments for slotCompactor cl_int err; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clAssemblyBuffer); if (err != CL_SUCCESS) @@ -331,17 +348,44 @@ void OpenClCollatingAndMeshingEngine::start( return; } - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clCollationBuffer); + err = clSetKernelArg(kernel, 1, sizeof(uint32_t), &numSlots); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl; return; } - // TODO: Set additional kernel arguments as needed (e.g., buffer sizes, metadata) + err = clSetKernelArg(kernel, 2, sizeof(uint32_t), &slotStride); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set kernel arg 2: " << err << std::endl; + return; + } - // Enqueue kernel execution - size_t globalWorkSize = 1; // TODO: Calculate appropriate work size + err = clSetKernelArg(kernel, 3, sizeof(uint32_t), &slotSize); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set kernel arg 3: " << err << std::endl; + return; + } + + err = clSetKernelArg(kernel, 4, sizeof(uint32_t), &firstSlotOffset); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set kernel arg 4: " << err << std::endl; + return; + } + + uint32_t nSucceededUint = static_cast(nSucceeded); + err = clSetKernelArg(kernel, 5, sizeof(uint32_t), &nSucceededUint); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set kernel arg 5: " << err << std::endl; + return; + } + + // Enqueue kernel execution (single work item for sequential processing) + size_t globalWorkSize = 1; err = clEnqueueNDRangeKernel( commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr, 0, nullptr, ¤tKernelEvent); diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index 6be96ac..16ac1e0 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -39,7 +39,8 @@ public: void start( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, - collateFrameReqCbFn callback); + collateFrameReqCbFn callback, + uint32_t nSucceeded); void stop(); private: diff --git a/stimBuffApis/livoxGen1/stagingBuffer.h b/stimBuffApis/livoxGen1/stagingBuffer.h index 543467d..8f5ce93 100644 --- a/stimBuffApis/livoxGen1/stagingBuffer.h +++ b/stimBuffApis/livoxGen1/stagingBuffer.h @@ -163,8 +163,12 @@ private: // Layout/invariants size_t nDgramsPerFrame; + +public: size_t slotStrideNBytes; size_t firstSlotOffsetNBytes; // offset from buffer start to first slot + +private: IOEngineConstraints inputConstraints; // Descriptor (computed once; reused across frames)