livoxGen1:OpenCL kernels: add debug printfs
This commit is contained in:
@@ -1,3 +1,11 @@
|
|||||||
|
// Debug macro: define DEBUG_COLLATE_DGRAMS to enable printf statements
|
||||||
|
#define DEBUG_COLLATE_DGRAMS
|
||||||
|
#ifdef DEBUG_COLLATE_DGRAMS
|
||||||
|
#define DBG_PRINTF(...) printf(__VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define DBG_PRINTF(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
// Helper function to read a little-endian int32 from unaligned memory
|
// Helper function to read a little-endian int32 from unaligned memory
|
||||||
inline int readInt32LE(__global uchar* ptr)
|
inline int readInt32LE(__global uchar* ptr)
|
||||||
{
|
{
|
||||||
@@ -57,12 +65,21 @@ __kernel void collate(
|
|||||||
int z_mm = readInt32LE(pointPtr + 8);
|
int z_mm = readInt32LE(pointPtr + 8);
|
||||||
uchar reflectivity = pointPtr[12];
|
uchar reflectivity = pointPtr[12];
|
||||||
|
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n",
|
||||||
|
slotIndex, i, x_mm, y_mm, z_mm, reflectivity);
|
||||||
// Convert to PointXYZI (meters, float)
|
// Convert to PointXYZI (meters, float)
|
||||||
float x = (float)x_mm / 1000.0f;
|
float x = (float)x_mm / 1000.0f;
|
||||||
float y = (float)y_mm / 1000.0f;
|
float y = (float)y_mm / 1000.0f;
|
||||||
float z = (float)z_mm / 1000.0f;
|
float z = (float)z_mm / 1000.0f;
|
||||||
float intensity = (float)reflectivity;
|
float intensity = (float)reflectivity;
|
||||||
|
|
||||||
|
// Print intensity if above 5
|
||||||
|
if (intensity > 5.0f)
|
||||||
|
{
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
||||||
|
slotIndex, i, intensity);
|
||||||
|
}
|
||||||
|
|
||||||
// Write to collation buffer
|
// Write to collation buffer
|
||||||
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
||||||
collation[offset + 0] = x;
|
collation[offset + 0] = x;
|
||||||
@@ -86,12 +103,21 @@ __kernel void collate(
|
|||||||
uchar reflectivity = pointPtr[12];
|
uchar reflectivity = pointPtr[12];
|
||||||
// tag at offset 13 is ignored
|
// tag at offset 13 is ignored
|
||||||
|
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n",
|
||||||
|
slotIndex, i, x_mm, y_mm, z_mm, reflectivity);
|
||||||
// Convert to PointXYZI (meters, float)
|
// Convert to PointXYZI (meters, float)
|
||||||
float x = (float)x_mm / 1000.0f;
|
float x = (float)x_mm / 1000.0f;
|
||||||
float y = (float)y_mm / 1000.0f;
|
float y = (float)y_mm / 1000.0f;
|
||||||
float z = (float)z_mm / 1000.0f;
|
float z = (float)z_mm / 1000.0f;
|
||||||
float intensity = (float)reflectivity;
|
float intensity = (float)reflectivity;
|
||||||
|
|
||||||
|
// Print intensity if above 5
|
||||||
|
if (intensity > 5.0f)
|
||||||
|
{
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
||||||
|
slotIndex, i, intensity);
|
||||||
|
}
|
||||||
|
|
||||||
// Write to collation buffer
|
// Write to collation buffer
|
||||||
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
||||||
collation[offset + 0] = x;
|
collation[offset + 0] = x;
|
||||||
@@ -124,6 +150,13 @@ __kernel void collate(
|
|||||||
float z1 = (float)z1_mm / 1000.0f;
|
float z1 = (float)z1_mm / 1000.0f;
|
||||||
float intensity1 = (float)reflectivity1;
|
float intensity1 = (float)reflectivity1;
|
||||||
|
|
||||||
|
// Print intensity if above 5
|
||||||
|
if (intensity1 > 5.0f)
|
||||||
|
{
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
||||||
|
slotIndex, pointIndex, intensity1);
|
||||||
|
}
|
||||||
|
|
||||||
uint offset1 = collationBaseOffset
|
uint offset1 = collationBaseOffset
|
||||||
+ (pointIndex * FLOATS_PER_POINT);
|
+ (pointIndex * FLOATS_PER_POINT);
|
||||||
collation[offset1 + 0] = x1;
|
collation[offset1 + 0] = x1;
|
||||||
@@ -144,6 +177,13 @@ __kernel void collate(
|
|||||||
float z2 = (float)z2_mm / 1000.0f;
|
float z2 = (float)z2_mm / 1000.0f;
|
||||||
float intensity2 = (float)reflectivity2;
|
float intensity2 = (float)reflectivity2;
|
||||||
|
|
||||||
|
// Print intensity if above 5
|
||||||
|
if (intensity2 > 5.0f)
|
||||||
|
{
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
||||||
|
slotIndex, pointIndex, intensity2);
|
||||||
|
}
|
||||||
|
|
||||||
uint offset2 = collationBaseOffset
|
uint offset2 = collationBaseOffset
|
||||||
+ (pointIndex * FLOATS_PER_POINT);
|
+ (pointIndex * FLOATS_PER_POINT);
|
||||||
collation[offset2 + 0] = x2;
|
collation[offset2 + 0] = x2;
|
||||||
@@ -177,6 +217,13 @@ __kernel void collate(
|
|||||||
float z1 = (float)z1_mm / 1000.0f;
|
float z1 = (float)z1_mm / 1000.0f;
|
||||||
float intensity1 = (float)reflectivity1;
|
float intensity1 = (float)reflectivity1;
|
||||||
|
|
||||||
|
// Print intensity if above 5
|
||||||
|
if (intensity1 > 5.0f)
|
||||||
|
{
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
||||||
|
slotIndex, pointIndex, intensity1);
|
||||||
|
}
|
||||||
|
|
||||||
uint offset1 = collationBaseOffset
|
uint offset1 = collationBaseOffset
|
||||||
+ (pointIndex * FLOATS_PER_POINT);
|
+ (pointIndex * FLOATS_PER_POINT);
|
||||||
collation[offset1 + 0] = x1;
|
collation[offset1 + 0] = x1;
|
||||||
@@ -197,6 +244,13 @@ __kernel void collate(
|
|||||||
float z2 = (float)z2_mm / 1000.0f;
|
float z2 = (float)z2_mm / 1000.0f;
|
||||||
float intensity2 = (float)reflectivity2;
|
float intensity2 = (float)reflectivity2;
|
||||||
|
|
||||||
|
// Print intensity if above 5
|
||||||
|
if (intensity2 > 5.0f)
|
||||||
|
{
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
||||||
|
slotIndex, pointIndex, intensity2);
|
||||||
|
}
|
||||||
|
|
||||||
uint offset2 = collationBaseOffset
|
uint offset2 = collationBaseOffset
|
||||||
+ (pointIndex * FLOATS_PER_POINT);
|
+ (pointIndex * FLOATS_PER_POINT);
|
||||||
collation[offset2 + 0] = x2;
|
collation[offset2 + 0] = x2;
|
||||||
@@ -217,6 +271,13 @@ __kernel void collate(
|
|||||||
float z3 = (float)z3_mm / 1000.0f;
|
float z3 = (float)z3_mm / 1000.0f;
|
||||||
float intensity3 = (float)reflectivity3;
|
float intensity3 = (float)reflectivity3;
|
||||||
|
|
||||||
|
// Print intensity if above 5
|
||||||
|
if (intensity3 > 5.0f)
|
||||||
|
{
|
||||||
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
||||||
|
slotIndex, pointIndex, intensity3);
|
||||||
|
}
|
||||||
|
|
||||||
uint offset3 = collationBaseOffset
|
uint offset3 = collationBaseOffset
|
||||||
+ (pointIndex * FLOATS_PER_POINT);
|
+ (pointIndex * FLOATS_PER_POINT);
|
||||||
collation[offset3 + 0] = x3;
|
collation[offset3 + 0] = x3;
|
||||||
|
|||||||
@@ -1,3 +1,10 @@
|
|||||||
|
// Debug macro: define DEBUG_SLOT_COMPACTOR to enable printf statements
|
||||||
|
#ifdef DEBUG_SLOT_COMPACTOR
|
||||||
|
#define DBG_PRINTF(...) printf(__VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define DBG_PRINTF(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
__kernel void slotCompactor(
|
__kernel void slotCompactor(
|
||||||
__global uchar* assembly,
|
__global uchar* assembly,
|
||||||
uint numSlots,
|
uint numSlots,
|
||||||
@@ -14,21 +21,36 @@ __kernel void slotCompactor(
|
|||||||
// 2. Exit early once we've seen nSucceeded non-dummy slots
|
// 2. Exit early once we've seen nSucceeded non-dummy slots
|
||||||
// 3. Exit early once we've moved nFailed dummy slots
|
// 3. Exit early once we've moved nFailed dummy slots
|
||||||
|
|
||||||
|
DBG_PRINTF("slotCompactor: KERNEL STARTED\n");
|
||||||
|
DBG_PRINTF("slotCompactor: numSlots=%u, slotStride=%u, slotSize=%u, firstSlotOffset=%u, nSucceeded=%u\n",
|
||||||
|
numSlots, slotStride, slotSize, firstSlotOffset, nSucceeded);
|
||||||
|
|
||||||
uint nFailed = numSlots - nSucceeded; // Calculate number of failed slots
|
uint nFailed = numSlots - nSucceeded; // Calculate number of failed slots
|
||||||
uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen
|
uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen
|
||||||
uint dummiesMoved = 0; // Track how many dummy slots we've moved
|
uint dummiesMoved = 0; // Track how many dummy slots we've moved
|
||||||
|
|
||||||
|
DBG_PRINTF("slotCompactor: nFailed=%u\n", nFailed);
|
||||||
|
|
||||||
// Initialize rightmostNonDummy to start from the end
|
// Initialize rightmostNonDummy to start from the end
|
||||||
// We'll decrement it each time we use it to avoid re-selecting the same slot
|
// We'll decrement it each time we use it to avoid re-selecting the same slot
|
||||||
uint rightmostNonDummy = numSlots - 1;
|
uint rightmostNonDummy = numSlots - 1;
|
||||||
|
|
||||||
// Process slots from beginning to end
|
// Process slots from beginning to end
|
||||||
|
DBG_PRINTF("slotCompactor: Starting loop, numSlots=%u\n", numSlots);
|
||||||
for (uint i = 0; i < numSlots; ++i)
|
for (uint i = 0; i < numSlots; ++i)
|
||||||
{
|
{
|
||||||
// Optimization 2: Exit early once we've seen nSucceeded non-dummy slots
|
// Optimization 2: Exit early once we've seen nSucceeded non-dummy slots
|
||||||
if (nonDummiesSeen >= nSucceeded) { break; }
|
if (nonDummiesSeen >= nSucceeded) {
|
||||||
|
DBG_PRINTF("slotCompactor: Early exit at i=%u, nonDummiesSeen=%u >= nSucceeded=%u\n",
|
||||||
|
i, nonDummiesSeen, nSucceeded);
|
||||||
|
break;
|
||||||
|
}
|
||||||
// Optimization 3: Exit early once we've moved nFailed dummy slots
|
// Optimization 3: Exit early once we've moved nFailed dummy slots
|
||||||
if (dummiesMoved >= nFailed) { break; }
|
if (dummiesMoved >= nFailed) {
|
||||||
|
DBG_PRINTF("slotCompactor: Early exit at i=%u, dummiesMoved=%u >= nFailed=%u\n",
|
||||||
|
i, dummiesMoved, nFailed);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate slot address
|
// Calculate slot address
|
||||||
__global uchar* slotAddr = assembly + firstSlotOffset
|
__global uchar* slotAddr = assembly + firstSlotOffset
|
||||||
@@ -38,6 +60,11 @@ __kernel void slotCompactor(
|
|||||||
bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF)
|
bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF)
|
||||||
&& (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF);
|
&& (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF);
|
||||||
|
|
||||||
|
if (i < 5 || i == numSlots - 1) {
|
||||||
|
DBG_PRINTF("slotCompactor: i=%u, slot[0-3]=0x%02X%02X%02X%02X, isDummy=%d\n",
|
||||||
|
i, slotAddr[0], slotAddr[1], slotAddr[2], slotAddr[3], isDummy ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
// Early continue for non-dummy slots (already in the right place)
|
// Early continue for non-dummy slots (already in the right place)
|
||||||
if (!isDummy)
|
if (!isDummy)
|
||||||
{
|
{
|
||||||
@@ -71,6 +98,7 @@ __kernel void slotCompactor(
|
|||||||
// If we found a non-dummy slot to the right, copy it here
|
// If we found a non-dummy slot to the right, copy it here
|
||||||
if (foundNonDummy)
|
if (foundNonDummy)
|
||||||
{
|
{
|
||||||
|
DBG_PRINTF("slotCompactor: Moving slot from %u to %u\n", rightmostNonDummy, i);
|
||||||
__global uchar* srcAddr = assembly + firstSlotOffset
|
__global uchar* srcAddr = assembly + firstSlotOffset
|
||||||
+ (rightmostNonDummy * slotStride);
|
+ (rightmostNonDummy * slotStride);
|
||||||
|
|
||||||
@@ -89,6 +117,13 @@ __kernel void slotCompactor(
|
|||||||
|
|
||||||
++dummiesMoved;
|
++dummiesMoved;
|
||||||
++nonDummiesSeen; // We just moved a non-dummy to this position
|
++nonDummiesSeen; // We just moved a non-dummy to this position
|
||||||
|
} else {
|
||||||
|
if (i < 5) {
|
||||||
|
DBG_PRINTF("slotCompactor: i=%u, no non-dummy found to move\n", i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
DBG_PRINTF("slotCompactor: Loop complete, nonDummiesSeen=%u, dummiesMoved=%u\n",
|
||||||
|
nonDummiesSeen, dummiesMoved);
|
||||||
|
DBG_PRINTF("slotCompactor: KERNEL FINISHED\n");
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user