Files
salmanoff/stimBuffApis/livoxGen1/slotCompactor.cl
T

130 lines
4.2 KiB
Common Lisp

// Debug macro: define DEBUG_SLOT_COMPACTOR to enable printf statements
#ifdef DEBUG_SLOT_COMPACTOR
#define DBG_PRINTF(...) printf(__VA_ARGS__)
#else
#define DBG_PRINTF(...)
#endif
__kernel void slotCompactor(
__global uchar* assembly,
uint numSlots,
uint slotStride,
uint slotSize,
uint firstSlotOffset,
uint nSucceeded)
{
// Sequential processing: single work item processes all slots
// Compact non-dummy slots to the beginning (lowest addresses)
// Dummy slots will remain at the end (highest addresses)
// Optimizations:
// 1. For each dummy, find rightmost non-dummy and copy it there
// 2. Exit early once we've seen nSucceeded non-dummy slots
// 3. Exit early once we've moved nFailed dummy slots
DBG_PRINTF("slotCompactor: KERNEL STARTED\n");
DBG_PRINTF("slotCompactor: numSlots=%u, slotStride=%u, slotSize=%u, firstSlotOffset=%u, nSucceeded=%u\n",
numSlots, slotStride, slotSize, firstSlotOffset, nSucceeded);
uint nFailed = numSlots - nSucceeded; // Calculate number of failed slots
uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen
uint dummiesMoved = 0; // Track how many dummy slots we've moved
DBG_PRINTF("slotCompactor: nFailed=%u\n", nFailed);
// Initialize rightmostNonDummy to start from the end
// We'll decrement it each time we use it to avoid re-selecting the same slot
uint rightmostNonDummy = numSlots - 1;
// Process slots from beginning to end
DBG_PRINTF("slotCompactor: Starting loop, numSlots=%u\n", numSlots);
for (uint i = 0; i < numSlots; ++i)
{
// Optimization 2: Exit early once we've seen nSucceeded non-dummy slots
if (nonDummiesSeen >= nSucceeded) {
DBG_PRINTF("slotCompactor: Early exit at i=%u, nonDummiesSeen=%u >= nSucceeded=%u\n",
i, nonDummiesSeen, nSucceeded);
break;
}
// Optimization 3: Exit early once we've moved nFailed dummy slots
if (dummiesMoved >= nFailed) {
DBG_PRINTF("slotCompactor: Early exit at i=%u, dummiesMoved=%u >= nFailed=%u\n",
i, dummiesMoved, nFailed);
break;
}
// Calculate slot address
__global uchar* slotAddr = assembly + firstSlotOffset
+ (i * slotStride);
// Check if slot is dummy: first 4 bytes should all be 0xFF
bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF)
&& (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF);
if (i < 5 || i == numSlots - 1) {
DBG_PRINTF("slotCompactor: i=%u, slot[0-3]=0x%02X%02X%02X%02X, isDummy=%d\n",
i, slotAddr[0], slotAddr[1], slotAddr[2], slotAddr[3], isDummy ? 1 : 0);
}
// Early continue for non-dummy slots (already in the right place)
if (!isDummy)
{
++nonDummiesSeen;
continue;
}
// Optimization 1: Find rightmost non-dummy slot starting from where we left off
// Search backwards from rightmostNonDummy until we find a non-dummy slot
// or reach the current position
bool foundNonDummy = false;
for (int j = (int)rightmostNonDummy; j > (int)i; --j)
{
__global uchar* checkSlotAddr = assembly + firstSlotOffset
+ (j * slotStride);
bool checkIsDummy = (checkSlotAddr[0] == 0xFF)
&& (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF)
&& (checkSlotAddr[3] == 0xFF);
// Early continue for dummy slots
if (checkIsDummy)
{ continue; }
// Found a non-dummy slot
rightmostNonDummy = (uint)j;
foundNonDummy = true;
break;
}
// If we found a non-dummy slot to the right, copy it here
if (foundNonDummy)
{
DBG_PRINTF("slotCompactor: Moving slot from %u to %u\n", rightmostNonDummy, i);
__global uchar* srcAddr = assembly + firstSlotOffset
+ (rightmostNonDummy * slotStride);
// Copy slot data (byte-by-byte copy)
for (uint j = 0; j < slotSize; ++j) {
slotAddr[j] = srcAddr[j];
}
// Mark the source slot as dummy (move it to the end)
for (uint j = 0; j < 4; ++j) {
srcAddr[j] = 0xFF;
}
// Decrement rightmostNonDummy to avoid re-selecting the same slot
--rightmostNonDummy;
++dummiesMoved;
++nonDummiesSeen; // We just moved a non-dummy to this position
} else {
if (i < 5) {
DBG_PRINTF("slotCompactor: i=%u, no non-dummy found to move\n", i);
}
}
}
DBG_PRINTF("slotCompactor: Loop complete, nonDummiesSeen=%u, dummiesMoved=%u\n",
nonDummiesSeen, dummiesMoved);
DBG_PRINTF("slotCompactor: KERNEL FINISHED\n");
}