From e233dc51d6bd15db7668efb172c9ab82e202c023 Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Thu, 20 Nov 2025 01:48:59 -0400 Subject: [PATCH] OClCollMeshEngn: hide StagingBuffer's firstSlotOffset --- include/user/stagingBuffer.h | 6 ++-- stimBuffApis/livoxGen1/collateDgrams.cl | 4 +-- .../openClCollatingAndMeshingEngine.cpp | 28 ++----------------- stimBuffApis/livoxGen1/slotCompactor.cl | 13 ++++----- 4 files changed, 13 insertions(+), 38 deletions(-) diff --git a/include/user/stagingBuffer.h b/include/user/stagingBuffer.h index 81882ec..05622fa 100644 --- a/include/user/stagingBuffer.h +++ b/include/user/stagingBuffer.h @@ -119,12 +119,14 @@ public: /** EXPLANATION: * Returns an iovec for OpenCL engine buffer access. * The buffer is mmap()-allocated and suitable for CL_MEM_USE_HOST_PTR. + * Returns pointer to first slot (offset by firstSlotOffsetNBytes) and + * size from first slot to end of buffer. */ struct iovec getClEngineIovec() const { struct iovec iov; - iov.iov_base = buffer.get(); - iov.iov_len = bufferNBytes; + iov.iov_base = buffer.get() + firstSlotOffsetNBytes; + iov.iov_len = bufferNBytes - firstSlotOffsetNBytes; return iov; } diff --git a/stimBuffApis/livoxGen1/collateDgrams.cl b/stimBuffApis/livoxGen1/collateDgrams.cl index de56fac..3462a17 100644 --- a/stimBuffApis/livoxGen1/collateDgrams.cl +++ b/stimBuffApis/livoxGen1/collateDgrams.cl @@ -25,7 +25,6 @@ __kernel void collate( __global uchar* assembly, __global float* collation, uint slotStride, - uint firstSlotOffset, uint nPointsPerSlot, uint nDgramsPerFrame) { @@ -36,8 +35,7 @@ __kernel void collate( if (slotIndex >= nDgramsPerFrame) { return; } // Calculate slot address - __global uchar* slotStart = assembly + firstSlotOffset - + (slotIndex * slotStride); + __global uchar* slotStart = assembly + (slotIndex * slotStride); // Read data_type from offset 9 (1 byte) uchar dataType = slotStart[9]; diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 102b7ed..2057329 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -476,8 +476,6 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs( uint32_t numSlots = static_cast(frameAssemblyDesc->numSlots); uint32_t slotStride = static_cast(assemblyBuff.slotStrideNBytes); uint32_t slotSize = static_cast(frameAssemblyDesc->slotSizeBytes); - uint32_t firstSlotOffset = static_cast( - assemblyBuff.firstSlotOffsetNBytes); uint32_t nSucceededUint = static_cast(nSucceeded); // Set kernel arguments for slotCompactor @@ -517,7 +515,7 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs( } err = clSetKernelArg( - slotCompactorKernel, 4, sizeof(uint32_t), &firstSlotOffset); + slotCompactorKernel, 4, sizeof(uint32_t), &nSucceededUint); if (err != CL_SUCCESS) { @@ -526,16 +524,6 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs( return false; } - err = clSetKernelArg( - slotCompactorKernel, 5, sizeof(uint32_t), &nSucceededUint); - - if (err != CL_SUCCESS) - { - std::cerr << __func__ << ": failed to set kernel arg 5: " << err - << std::endl; - return false; - } - return true; } @@ -544,8 +532,6 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( { // Extract parameters for collateDgrams kernel uint32_t slotStride = static_cast(assemblyBuff.slotStrideNBytes); - uint32_t firstSlotOffset = static_cast( - assemblyBuff.firstSlotOffsetNBytes); // Calculate nPointsPerSlot from device return mode if (!parent.device) @@ -585,7 +571,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( return false; } - err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &firstSlotOffset); + err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &nPointsPerSlot); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set kernel arg 3: " << err @@ -593,7 +579,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( return false; } - err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot); + err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nDgramsPerFrame); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set kernel arg 4: " << err @@ -601,14 +587,6 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs( return false; } - err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame); - if (err != CL_SUCCESS) - { - std::cerr << __func__ << ": failed to set kernel arg 5: " << err - << std::endl; - return false; - } - return true; } diff --git a/stimBuffApis/livoxGen1/slotCompactor.cl b/stimBuffApis/livoxGen1/slotCompactor.cl index 8bb6538..abcb909 100644 --- a/stimBuffApis/livoxGen1/slotCompactor.cl +++ b/stimBuffApis/livoxGen1/slotCompactor.cl @@ -10,7 +10,6 @@ __kernel void slotCompactor( uint numSlots, uint slotStride, uint slotSize, - uint firstSlotOffset, uint nSucceeded) { // Sequential processing: single work item processes all slots @@ -22,8 +21,8 @@ __kernel void slotCompactor( // 3. Exit early once we've moved nFailed dummy slots DBG_PRINTF("slotCompactor: KERNEL STARTED\n"); - DBG_PRINTF("slotCompactor: numSlots=%u, slotStride=%u, slotSize=%u, firstSlotOffset=%u, nSucceeded=%u\n", - numSlots, slotStride, slotSize, firstSlotOffset, nSucceeded); + DBG_PRINTF("slotCompactor: numSlots=%u, slotStride=%u, slotSize=%u, nSucceeded=%u\n", + numSlots, slotStride, slotSize, nSucceeded); uint nFailed = numSlots - nSucceeded; // Calculate number of failed slots uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen @@ -53,8 +52,7 @@ __kernel void slotCompactor( } // Calculate slot address - __global uchar* slotAddr = assembly + firstSlotOffset - + (i * slotStride); + __global uchar* slotAddr = assembly + (i * slotStride); // Check if slot is dummy: first 4 bytes should all be 0xFF bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF) @@ -78,8 +76,7 @@ __kernel void slotCompactor( bool foundNonDummy = false; for (int j = (int)rightmostNonDummy; j > (int)i; --j) { - __global uchar* checkSlotAddr = assembly + firstSlotOffset - + (j * slotStride); + __global uchar* checkSlotAddr = assembly + (j * slotStride); bool checkIsDummy = (checkSlotAddr[0] == 0xFF) && (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF) @@ -99,7 +96,7 @@ __kernel void slotCompactor( if (foundNonDummy) { DBG_PRINTF("slotCompactor: Moving slot from %u to %u\n", rightmostNonDummy, i); - __global uchar* srcAddr = assembly + firstSlotOffset + __global uchar* srcAddr = assembly + (rightmostNonDummy * slotStride); // Copy slot data (byte-by-byte copy)