livoxG1:slotCompactor.cl: mental-validate and refactor

This commit is contained in:
2025-11-09 03:36:58 -04:00
parent a0a5aa49ad
commit 511f1796e8
2 changed files with 56 additions and 44 deletions
+3 -2
View File
@@ -50,10 +50,11 @@ if(ENABLE_STIMBUFFAPI_livoxGen1)
) )
# Set assembler working directory so .incbin can find the .cl file # Set assembler working directory so .incbin can find the .cl file
# Also declare dependency on collateDgrams.cl # Also declare dependency on collateDgrams.cl and slotCompactor.cl
set_source_files_properties(openClKernels.cl.S PROPERTIES set_source_files_properties(openClKernels.cl.S PROPERTIES
COMPILE_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}" COMPILE_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}"
OBJECT_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/collateDgrams.cl" OBJECT_DEPENDS
"${CMAKE_CURRENT_SOURCE_DIR}/collateDgrams.cl;${CMAKE_CURRENT_SOURCE_DIR}/slotCompactor.cl"
) )
target_include_directories(livoxGen1 PUBLIC target_include_directories(livoxGen1 PUBLIC
+53 -42
View File
@@ -18,6 +18,10 @@ __kernel void slotCompactor(
uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen
uint dummiesMoved = 0; // Track how many dummy slots we've moved uint dummiesMoved = 0; // Track how many dummy slots we've moved
// Initialize rightmostNonDummy to start from the end
// We'll decrement it each time we use it to avoid re-selecting the same slot
uint rightmostNonDummy = numSlots - 1;
// Process slots from beginning to end // Process slots from beginning to end
for (uint i = 0; i < numSlots; ++i) for (uint i = 0; i < numSlots; ++i)
{ {
@@ -34,50 +38,57 @@ __kernel void slotCompactor(
bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF) bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF)
&& (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF); && (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF);
if (isDummy) // Early continue for non-dummy slots (already in the right place)
if (!isDummy)
{ {
// Optimization 1: Find rightmost non-dummy slot and copy it here
uint rightmostNonDummy = numSlots;
for (int j = (int)numSlots - 1; j > (int)i; --j)
{
__global uchar* checkSlotAddr = assembly + firstSlotOffset
+ (j * slotStride);
bool checkIsDummy = (checkSlotAddr[0] == 0xFF)
&& (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF)
&& (checkSlotAddr[3] == 0xFF);
if (!checkIsDummy)
{
rightmostNonDummy = (uint)j;
break;
}
}
// If we found a non-dummy slot to the right, copy it here
if (rightmostNonDummy < numSlots)
{
__global uchar* srcAddr = assembly + firstSlotOffset
+ (rightmostNonDummy * slotStride);
// Copy slot data (byte-by-byte copy)
for (uint j = 0; j < slotSize; ++j) {
slotAddr[j] = srcAddr[j];
}
// Mark the source slot as dummy (move it to the end)
for (uint j = 0; j < 4; ++j) {
srcAddr[j] = 0xFF;
}
++dummiesMoved;
++nonDummiesSeen; // We just moved a non-dummy to this position
}
}
else
{
// Slot is non-dummy - it's already in the right place
++nonDummiesSeen; ++nonDummiesSeen;
continue;
}
// Optimization 1: Find rightmost non-dummy slot starting from where we left off
// Search backwards from rightmostNonDummy until we find a non-dummy slot
// or reach the current position
bool foundNonDummy = false;
for (int j = (int)rightmostNonDummy; j > (int)i; --j)
{
__global uchar* checkSlotAddr = assembly + firstSlotOffset
+ (j * slotStride);
bool checkIsDummy = (checkSlotAddr[0] == 0xFF)
&& (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF)
&& (checkSlotAddr[3] == 0xFF);
// Early continue for dummy slots
if (checkIsDummy)
{ continue; }
// Found a non-dummy slot
rightmostNonDummy = (uint)j;
foundNonDummy = true;
break;
}
// If we found a non-dummy slot to the right, copy it here
if (foundNonDummy)
{
__global uchar* srcAddr = assembly + firstSlotOffset
+ (rightmostNonDummy * slotStride);
// Copy slot data (byte-by-byte copy)
for (uint j = 0; j < slotSize; ++j) {
slotAddr[j] = srcAddr[j];
}
// Mark the source slot as dummy (move it to the end)
for (uint j = 0; j < 4; ++j) {
srcAddr[j] = 0xFF;
}
// Decrement rightmostNonDummy to avoid re-selecting the same slot
--rightmostNonDummy;
++dummiesMoved;
++nonDummiesSeen; // We just moved a non-dummy to this position
} }
} }
} }