livoxG1:slotCompactor.cl: mental-validate and refactor
This commit is contained in:
@@ -50,10 +50,11 @@ if(ENABLE_STIMBUFFAPI_livoxGen1)
|
||||
)
|
||||
|
||||
# Set assembler working directory so .incbin can find the .cl file
|
||||
# Also declare dependency on collateDgrams.cl
|
||||
# Also declare dependency on collateDgrams.cl and slotCompactor.cl
|
||||
set_source_files_properties(openClKernels.cl.S PROPERTIES
|
||||
COMPILE_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
OBJECT_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/collateDgrams.cl"
|
||||
OBJECT_DEPENDS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/collateDgrams.cl;${CMAKE_CURRENT_SOURCE_DIR}/slotCompactor.cl"
|
||||
)
|
||||
|
||||
target_include_directories(livoxGen1 PUBLIC
|
||||
|
||||
@@ -18,6 +18,10 @@ __kernel void slotCompactor(
|
||||
uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen
|
||||
uint dummiesMoved = 0; // Track how many dummy slots we've moved
|
||||
|
||||
// Initialize rightmostNonDummy to start from the end
|
||||
// We'll decrement it each time we use it to avoid re-selecting the same slot
|
||||
uint rightmostNonDummy = numSlots - 1;
|
||||
|
||||
// Process slots from beginning to end
|
||||
for (uint i = 0; i < numSlots; ++i)
|
||||
{
|
||||
@@ -34,50 +38,57 @@ __kernel void slotCompactor(
|
||||
bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF)
|
||||
&& (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF);
|
||||
|
||||
if (isDummy)
|
||||
// Early continue for non-dummy slots (already in the right place)
|
||||
if (!isDummy)
|
||||
{
|
||||
// Optimization 1: Find rightmost non-dummy slot and copy it here
|
||||
uint rightmostNonDummy = numSlots;
|
||||
for (int j = (int)numSlots - 1; j > (int)i; --j)
|
||||
{
|
||||
__global uchar* checkSlotAddr = assembly + firstSlotOffset
|
||||
+ (j * slotStride);
|
||||
|
||||
bool checkIsDummy = (checkSlotAddr[0] == 0xFF)
|
||||
&& (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF)
|
||||
&& (checkSlotAddr[3] == 0xFF);
|
||||
|
||||
if (!checkIsDummy)
|
||||
{
|
||||
rightmostNonDummy = (uint)j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we found a non-dummy slot to the right, copy it here
|
||||
if (rightmostNonDummy < numSlots)
|
||||
{
|
||||
__global uchar* srcAddr = assembly + firstSlotOffset
|
||||
+ (rightmostNonDummy * slotStride);
|
||||
|
||||
// Copy slot data (byte-by-byte copy)
|
||||
for (uint j = 0; j < slotSize; ++j) {
|
||||
slotAddr[j] = srcAddr[j];
|
||||
}
|
||||
|
||||
// Mark the source slot as dummy (move it to the end)
|
||||
for (uint j = 0; j < 4; ++j) {
|
||||
srcAddr[j] = 0xFF;
|
||||
}
|
||||
|
||||
++dummiesMoved;
|
||||
++nonDummiesSeen; // We just moved a non-dummy to this position
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Slot is non-dummy - it's already in the right place
|
||||
++nonDummiesSeen;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Optimization 1: Find rightmost non-dummy slot starting from where we left off
|
||||
// Search backwards from rightmostNonDummy until we find a non-dummy slot
|
||||
// or reach the current position
|
||||
bool foundNonDummy = false;
|
||||
for (int j = (int)rightmostNonDummy; j > (int)i; --j)
|
||||
{
|
||||
__global uchar* checkSlotAddr = assembly + firstSlotOffset
|
||||
+ (j * slotStride);
|
||||
|
||||
bool checkIsDummy = (checkSlotAddr[0] == 0xFF)
|
||||
&& (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF)
|
||||
&& (checkSlotAddr[3] == 0xFF);
|
||||
|
||||
// Early continue for dummy slots
|
||||
if (checkIsDummy)
|
||||
{ continue; }
|
||||
|
||||
// Found a non-dummy slot
|
||||
rightmostNonDummy = (uint)j;
|
||||
foundNonDummy = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// If we found a non-dummy slot to the right, copy it here
|
||||
if (foundNonDummy)
|
||||
{
|
||||
__global uchar* srcAddr = assembly + firstSlotOffset
|
||||
+ (rightmostNonDummy * slotStride);
|
||||
|
||||
// Copy slot data (byte-by-byte copy)
|
||||
for (uint j = 0; j < slotSize; ++j) {
|
||||
slotAddr[j] = srcAddr[j];
|
||||
}
|
||||
|
||||
// Mark the source slot as dummy (move it to the end)
|
||||
for (uint j = 0; j < 4; ++j) {
|
||||
srcAddr[j] = 0xFF;
|
||||
}
|
||||
|
||||
// Decrement rightmostNonDummy to avoid re-selecting the same slot
|
||||
--rightmostNonDummy;
|
||||
|
||||
++dummiesMoved;
|
||||
++nonDummiesSeen; // We just moved a non-dummy to this position
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user