livoxG1:slotCompactor.cl: mental-validate and refactor
This commit is contained in:
@@ -50,10 +50,11 @@ if(ENABLE_STIMBUFFAPI_livoxGen1)
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Set assembler working directory so .incbin can find the .cl file
|
# Set assembler working directory so .incbin can find the .cl file
|
||||||
# Also declare dependency on collateDgrams.cl
|
# Also declare dependency on collateDgrams.cl and slotCompactor.cl
|
||||||
set_source_files_properties(openClKernels.cl.S PROPERTIES
|
set_source_files_properties(openClKernels.cl.S PROPERTIES
|
||||||
COMPILE_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}"
|
COMPILE_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}"
|
||||||
OBJECT_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/collateDgrams.cl"
|
OBJECT_DEPENDS
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/collateDgrams.cl;${CMAKE_CURRENT_SOURCE_DIR}/slotCompactor.cl"
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(livoxGen1 PUBLIC
|
target_include_directories(livoxGen1 PUBLIC
|
||||||
|
|||||||
@@ -18,6 +18,10 @@ __kernel void slotCompactor(
|
|||||||
uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen
|
uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen
|
||||||
uint dummiesMoved = 0; // Track how many dummy slots we've moved
|
uint dummiesMoved = 0; // Track how many dummy slots we've moved
|
||||||
|
|
||||||
|
// Initialize rightmostNonDummy to start from the end
|
||||||
|
// We'll decrement it each time we use it to avoid re-selecting the same slot
|
||||||
|
uint rightmostNonDummy = numSlots - 1;
|
||||||
|
|
||||||
// Process slots from beginning to end
|
// Process slots from beginning to end
|
||||||
for (uint i = 0; i < numSlots; ++i)
|
for (uint i = 0; i < numSlots; ++i)
|
||||||
{
|
{
|
||||||
@@ -34,50 +38,57 @@ __kernel void slotCompactor(
|
|||||||
bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF)
|
bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF)
|
||||||
&& (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF);
|
&& (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF);
|
||||||
|
|
||||||
if (isDummy)
|
// Early continue for non-dummy slots (already in the right place)
|
||||||
|
if (!isDummy)
|
||||||
{
|
{
|
||||||
// Optimization 1: Find rightmost non-dummy slot and copy it here
|
|
||||||
uint rightmostNonDummy = numSlots;
|
|
||||||
for (int j = (int)numSlots - 1; j > (int)i; --j)
|
|
||||||
{
|
|
||||||
__global uchar* checkSlotAddr = assembly + firstSlotOffset
|
|
||||||
+ (j * slotStride);
|
|
||||||
|
|
||||||
bool checkIsDummy = (checkSlotAddr[0] == 0xFF)
|
|
||||||
&& (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF)
|
|
||||||
&& (checkSlotAddr[3] == 0xFF);
|
|
||||||
|
|
||||||
if (!checkIsDummy)
|
|
||||||
{
|
|
||||||
rightmostNonDummy = (uint)j;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we found a non-dummy slot to the right, copy it here
|
|
||||||
if (rightmostNonDummy < numSlots)
|
|
||||||
{
|
|
||||||
__global uchar* srcAddr = assembly + firstSlotOffset
|
|
||||||
+ (rightmostNonDummy * slotStride);
|
|
||||||
|
|
||||||
// Copy slot data (byte-by-byte copy)
|
|
||||||
for (uint j = 0; j < slotSize; ++j) {
|
|
||||||
slotAddr[j] = srcAddr[j];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mark the source slot as dummy (move it to the end)
|
|
||||||
for (uint j = 0; j < 4; ++j) {
|
|
||||||
srcAddr[j] = 0xFF;
|
|
||||||
}
|
|
||||||
|
|
||||||
++dummiesMoved;
|
|
||||||
++nonDummiesSeen; // We just moved a non-dummy to this position
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Slot is non-dummy - it's already in the right place
|
|
||||||
++nonDummiesSeen;
|
++nonDummiesSeen;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optimization 1: Find rightmost non-dummy slot starting from where we left off
|
||||||
|
// Search backwards from rightmostNonDummy until we find a non-dummy slot
|
||||||
|
// or reach the current position
|
||||||
|
bool foundNonDummy = false;
|
||||||
|
for (int j = (int)rightmostNonDummy; j > (int)i; --j)
|
||||||
|
{
|
||||||
|
__global uchar* checkSlotAddr = assembly + firstSlotOffset
|
||||||
|
+ (j * slotStride);
|
||||||
|
|
||||||
|
bool checkIsDummy = (checkSlotAddr[0] == 0xFF)
|
||||||
|
&& (checkSlotAddr[1] == 0xFF) && (checkSlotAddr[2] == 0xFF)
|
||||||
|
&& (checkSlotAddr[3] == 0xFF);
|
||||||
|
|
||||||
|
// Early continue for dummy slots
|
||||||
|
if (checkIsDummy)
|
||||||
|
{ continue; }
|
||||||
|
|
||||||
|
// Found a non-dummy slot
|
||||||
|
rightmostNonDummy = (uint)j;
|
||||||
|
foundNonDummy = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we found a non-dummy slot to the right, copy it here
|
||||||
|
if (foundNonDummy)
|
||||||
|
{
|
||||||
|
__global uchar* srcAddr = assembly + firstSlotOffset
|
||||||
|
+ (rightmostNonDummy * slotStride);
|
||||||
|
|
||||||
|
// Copy slot data (byte-by-byte copy)
|
||||||
|
for (uint j = 0; j < slotSize; ++j) {
|
||||||
|
slotAddr[j] = srcAddr[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark the source slot as dummy (move it to the end)
|
||||||
|
for (uint j = 0; j < 4; ++j) {
|
||||||
|
srcAddr[j] = 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decrement rightmostNonDummy to avoid re-selecting the same slot
|
||||||
|
--rightmostNonDummy;
|
||||||
|
|
||||||
|
++dummiesMoved;
|
||||||
|
++nonDummiesSeen; // We just moved a non-dummy to this position
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user