diff --git a/stimBuffApis/livoxGen1/collateDgrams.cl b/stimBuffApis/livoxGen1/collateDgrams.cl index 67211d4..60e0d32 100644 --- a/stimBuffApis/livoxGen1/collateDgrams.cl +++ b/stimBuffApis/livoxGen1/collateDgrams.cl @@ -1,3 +1,11 @@ +// Debug macro: define DEBUG_COLLATE_DGRAMS to enable printf statements +#define DEBUG_COLLATE_DGRAMS +#ifdef DEBUG_COLLATE_DGRAMS +#define DBG_PRINTF(...) printf(__VA_ARGS__) +#else +#define DBG_PRINTF(...) +#endif + // Helper function to read a little-endian int32 from unaligned memory inline int readInt32LE(__global uchar* ptr) { @@ -57,12 +65,21 @@ __kernel void collate( int z_mm = readInt32LE(pointPtr + 8); uchar reflectivity = pointPtr[12]; + DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n", + slotIndex, i, x_mm, y_mm, z_mm, reflectivity); // Convert to PointXYZI (meters, float) float x = (float)x_mm / 1000.0f; float y = (float)y_mm / 1000.0f; float z = (float)z_mm / 1000.0f; float intensity = (float)reflectivity; + // Print intensity if above 5 + if (intensity > 5.0f) + { + DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", + slotIndex, i, intensity); + } + // Write to collation buffer uint offset = collationBaseOffset + (i * FLOATS_PER_POINT); collation[offset + 0] = x; @@ -86,12 +103,21 @@ __kernel void collate( uchar reflectivity = pointPtr[12]; // tag at offset 13 is ignored + DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n", + slotIndex, i, x_mm, y_mm, z_mm, reflectivity); // Convert to PointXYZI (meters, float) float x = (float)x_mm / 1000.0f; float y = (float)y_mm / 1000.0f; float z = (float)z_mm / 1000.0f; float intensity = (float)reflectivity; + // Print intensity if above 5 + if (intensity > 5.0f) + { + DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", + slotIndex, i, intensity); + } + // Write to collation buffer uint offset = collationBaseOffset + (i * FLOATS_PER_POINT); collation[offset + 0] = x; @@ -124,6 +150,13 @@ __kernel void collate( float z1 = (float)z1_mm / 1000.0f; float intensity1 = (float)reflectivity1; + // Print intensity if above 5 + if (intensity1 > 5.0f) + { + DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", + slotIndex, pointIndex, intensity1); + } + uint offset1 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset1 + 0] = x1; @@ -144,6 +177,13 @@ __kernel void collate( float z2 = (float)z2_mm / 1000.0f; float intensity2 = (float)reflectivity2; + // Print intensity if above 5 + if (intensity2 > 5.0f) + { + DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", + slotIndex, pointIndex, intensity2); + } + uint offset2 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset2 + 0] = x2; @@ -177,6 +217,13 @@ __kernel void collate( float z1 = (float)z1_mm / 1000.0f; float intensity1 = (float)reflectivity1; + // Print intensity if above 5 + if (intensity1 > 5.0f) + { + DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", + slotIndex, pointIndex, intensity1); + } + uint offset1 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset1 + 0] = x1; @@ -197,6 +244,13 @@ __kernel void collate( float z2 = (float)z2_mm / 1000.0f; float intensity2 = (float)reflectivity2; + // Print intensity if above 5 + if (intensity2 > 5.0f) + { + DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", + slotIndex, pointIndex, intensity2); + } + uint offset2 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset2 + 0] = x2; @@ -217,6 +271,13 @@ __kernel void collate( float z3 = (float)z3_mm / 1000.0f; float intensity3 = (float)reflectivity3; + // Print intensity if above 5 + if (intensity3 > 5.0f) + { + DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", + slotIndex, pointIndex, intensity3); + } + uint offset3 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset3 + 0] = x3; diff --git a/stimBuffApis/livoxGen1/slotCompactor.cl b/stimBuffApis/livoxGen1/slotCompactor.cl index 7ad3ba7..8bb6538 100644 --- a/stimBuffApis/livoxGen1/slotCompactor.cl +++ b/stimBuffApis/livoxGen1/slotCompactor.cl @@ -1,3 +1,10 @@ +// Debug macro: define DEBUG_SLOT_COMPACTOR to enable printf statements +#ifdef DEBUG_SLOT_COMPACTOR +#define DBG_PRINTF(...) printf(__VA_ARGS__) +#else +#define DBG_PRINTF(...) +#endif + __kernel void slotCompactor( __global uchar* assembly, uint numSlots, @@ -14,21 +21,36 @@ __kernel void slotCompactor( // 2. Exit early once we've seen nSucceeded non-dummy slots // 3. Exit early once we've moved nFailed dummy slots + DBG_PRINTF("slotCompactor: KERNEL STARTED\n"); + DBG_PRINTF("slotCompactor: numSlots=%u, slotStride=%u, slotSize=%u, firstSlotOffset=%u, nSucceeded=%u\n", + numSlots, slotStride, slotSize, firstSlotOffset, nSucceeded); + uint nFailed = numSlots - nSucceeded; // Calculate number of failed slots uint nonDummiesSeen = 0; // Track how many non-dummy slots we've seen uint dummiesMoved = 0; // Track how many dummy slots we've moved + DBG_PRINTF("slotCompactor: nFailed=%u\n", nFailed); + // Initialize rightmostNonDummy to start from the end // We'll decrement it each time we use it to avoid re-selecting the same slot uint rightmostNonDummy = numSlots - 1; // Process slots from beginning to end + DBG_PRINTF("slotCompactor: Starting loop, numSlots=%u\n", numSlots); for (uint i = 0; i < numSlots; ++i) { // Optimization 2: Exit early once we've seen nSucceeded non-dummy slots - if (nonDummiesSeen >= nSucceeded) { break; } + if (nonDummiesSeen >= nSucceeded) { + DBG_PRINTF("slotCompactor: Early exit at i=%u, nonDummiesSeen=%u >= nSucceeded=%u\n", + i, nonDummiesSeen, nSucceeded); + break; + } // Optimization 3: Exit early once we've moved nFailed dummy slots - if (dummiesMoved >= nFailed) { break; } + if (dummiesMoved >= nFailed) { + DBG_PRINTF("slotCompactor: Early exit at i=%u, dummiesMoved=%u >= nFailed=%u\n", + i, dummiesMoved, nFailed); + break; + } // Calculate slot address __global uchar* slotAddr = assembly + firstSlotOffset @@ -38,6 +60,11 @@ __kernel void slotCompactor( bool isDummy = (slotAddr[0] == 0xFF) && (slotAddr[1] == 0xFF) && (slotAddr[2] == 0xFF) && (slotAddr[3] == 0xFF); + if (i < 5 || i == numSlots - 1) { + DBG_PRINTF("slotCompactor: i=%u, slot[0-3]=0x%02X%02X%02X%02X, isDummy=%d\n", + i, slotAddr[0], slotAddr[1], slotAddr[2], slotAddr[3], isDummy ? 1 : 0); + } + // Early continue for non-dummy slots (already in the right place) if (!isDummy) { @@ -71,6 +98,7 @@ __kernel void slotCompactor( // If we found a non-dummy slot to the right, copy it here if (foundNonDummy) { + DBG_PRINTF("slotCompactor: Moving slot from %u to %u\n", rightmostNonDummy, i); __global uchar* srcAddr = assembly + firstSlotOffset + (rightmostNonDummy * slotStride); @@ -89,6 +117,13 @@ __kernel void slotCompactor( ++dummiesMoved; ++nonDummiesSeen; // We just moved a non-dummy to this position + } else { + if (i < 5) { + DBG_PRINTF("slotCompactor: i=%u, no non-dummy found to move\n", i); + } } } + DBG_PRINTF("slotCompactor: Loop complete, nonDummiesSeen=%u, dummiesMoved=%u\n", + nonDummiesSeen, dummiesMoved); + DBG_PRINTF("slotCompactor: KERNEL FINISHED\n"); }