OClCollMeshEngn: hide StagingBuffer's firstSlotOffset
This commit is contained in:
@@ -119,12 +119,14 @@ public:
|
||||
/** EXPLANATION:
|
||||
* Returns an iovec for OpenCL engine buffer access.
|
||||
* The buffer is mmap()-allocated and suitable for CL_MEM_USE_HOST_PTR.
|
||||
* Returns pointer to first slot (offset by firstSlotOffsetNBytes) and
|
||||
* size from first slot to end of buffer.
|
||||
*/
|
||||
struct iovec getClEngineIovec() const
|
||||
{
|
||||
struct iovec iov;
|
||||
iov.iov_base = buffer.get();
|
||||
iov.iov_len = bufferNBytes;
|
||||
iov.iov_base = buffer.get() + firstSlotOffsetNBytes;
|
||||
iov.iov_len = bufferNBytes - firstSlotOffsetNBytes;
|
||||
return iov;
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,6 @@ __kernel void collate(
|
||||
__global uchar* assembly,
|
||||
__global float* collation,
|
||||
uint slotStride,
|
||||
uint firstSlotOffset,
|
||||
uint nPointsPerSlot,
|
||||
uint nDgramsPerFrame)
|
||||
{
|
||||
@@ -36,8 +35,7 @@ __kernel void collate(
|
||||
if (slotIndex >= nDgramsPerFrame) { return; }
|
||||
|
||||
// Calculate slot address
|
||||
__global uchar* slotStart = assembly + firstSlotOffset
|
||||
+ (slotIndex * slotStride);
|
||||
__global uchar* slotStart = assembly + (slotIndex * slotStride);
|
||||
|
||||
// Read data_type from offset 9 (1 byte)
|
||||
uchar dataType = slotStart[9];
|
||||
|
||||
@@ -476,8 +476,6 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
||||
uint32_t numSlots = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
||||
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||
uint32_t slotSize = static_cast<uint32_t>(frameAssemblyDesc->slotSizeBytes);
|
||||
uint32_t firstSlotOffset = static_cast<uint32_t>(
|
||||
assemblyBuff.firstSlotOffsetNBytes);
|
||||
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
|
||||
|
||||
// Set kernel arguments for slotCompactor
|
||||
@@ -517,7 +515,7 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
||||
}
|
||||
|
||||
err = clSetKernelArg(
|
||||
slotCompactorKernel, 4, sizeof(uint32_t), &firstSlotOffset);
|
||||
slotCompactorKernel, 4, sizeof(uint32_t), &nSucceededUint);
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
@@ -526,16 +524,6 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(
|
||||
slotCompactorKernel, 5, sizeof(uint32_t), &nSucceededUint);
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -544,8 +532,6 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
{
|
||||
// Extract parameters for collateDgrams kernel
|
||||
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||
uint32_t firstSlotOffset = static_cast<uint32_t>(
|
||||
assemblyBuff.firstSlotOffsetNBytes);
|
||||
|
||||
// Calculate nPointsPerSlot from device return mode
|
||||
if (!parent.device)
|
||||
@@ -585,7 +571,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &firstSlotOffset);
|
||||
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &nPointsPerSlot);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
|
||||
@@ -593,7 +579,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot);
|
||||
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nDgramsPerFrame);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
|
||||
@@ -601,14 +587,6 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ __kernel void slotCompactor(
|
||||
uint numSlots,
|
||||
uint slotStride,
|
||||
uint slotSize,
|
||||
uint firstSlotOffset,
|
||||
uint nSucceeded)
|
||||
{
|
||||
// Sequential processing: single work item processes all slots
|
||||
@@ -22,8 +21,8 @@ __kernel void slotCompactor(
|
||||
// 3. Exit early once we've moved nFailed dummy slots
|
||||
|
||||
DBG_PRINTF("slotCompactor: KERNEL STARTED\n");
|
||||
DBG_PRINTF("slotCompactor: numSlots=%u, slotStride=%u, slotSize=%u, firstSlotOffset=%u, nSucceeded=%u\n",
|
||||
numSlots, slotStride, slotSize, firstSlotOffset, nSucceeded);
|
||||
DBG_PRINTF("slotCompactor: numSlots=%u, slotStride=%u, slotSize=%u, nSucceeded=%u\n",
|
||||
numSlots, slotStride, slotSize, nSucceeded);
|
||||
|
||||
uint nFailed = numSlots - nSucceeded; // Calculate number of failed slots
|
||||
uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen
|
||||
@@ -53,8 +52,7 @@ __kernel void slotCompactor(
|
||||
}
|
||||
|
||||
// Calculate slot address
|
||||
__global uchar* slotAddr = assembly + firstSlotOffset
|
||||
+ (i * slotStride);
|
||||
__global uchar* slotAddr = assembly + (i * slotStride);
|
||||
|
||||
// Check if slot is dummy: first 4 bytes should all be 0xFF
|
||||
bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF)
|
||||
@@ -78,8 +76,7 @@ __kernel void slotCompactor(
|
||||
bool foundNonDummy = false;
|
||||
for (int j = (int)rightmostNonDummy; j > (int)i; --j)
|
||||
{
|
||||
__global uchar* checkSlotAddr = assembly + firstSlotOffset
|
||||
+ (j * slotStride);
|
||||
__global uchar* checkSlotAddr = assembly + (j * slotStride);
|
||||
|
||||
bool checkIsDummy = (checkSlotAddr[0] == 0xFF)
|
||||
&& (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF)
|
||||
@@ -99,7 +96,7 @@ __kernel void slotCompactor(
|
||||
if (foundNonDummy)
|
||||
{
|
||||
DBG_PRINTF("slotCompactor: Moving slot from %u to %u\n", rightmostNonDummy, i);
|
||||
__global uchar* srcAddr = assembly + firstSlotOffset
|
||||
__global uchar* srcAddr = assembly
|
||||
+ (rightmostNonDummy * slotStride);
|
||||
|
||||
// Copy slot data (byte-by-byte copy)
|
||||
|
||||
Reference in New Issue
Block a user