// Debug macro: define DEBUG_COLLATE_DGRAMS to enable printf statements // #define DEBUG_COLLATE_DGRAMS #ifdef DEBUG_COLLATE_DGRAMS #define DBG_PRINTF(...) printf(__VA_ARGS__) #else #define DBG_PRINTF(...) #endif // Helper function to read a little-endian int32 from unaligned memory inline int readInt32LE(__global uchar* ptr) { // Read 4 bytes in little-endian order and assemble into int // Handle sign extension correctly for signed int int b0 = (int)ptr[0]; int b1 = (int)ptr[1]; int b2 = (int)ptr[2]; int b3 = (int)ptr[3]; // Assemble little-endian: b0 is LSB, b3 is MSB int value = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); return value; } __kernel void collate( __global uchar* assembly, __global float* collation, __global float* intensityBuffer, __global uint* ambienceBuffer, uint ambienceHighVal, uint slotStride, uint nPointsPerSlot, uint nDgramsPerFrame) { // Get work item index (slot index) uint slotIndex = get_global_id(0); // Bounds check if (slotIndex >= nDgramsPerFrame) { return; } // Calculate slot address __global uchar* slotStart = assembly + (slotIndex * slotStride); // Read data_type from offset 9 (1 byte) uchar dataType = slotStart[9]; // Get points array pointer (after 18-byte header) __global uchar* pointsArray = slotStart + 18; // Base offset in collation buffer for this slot (in floats) // Each PointXYZ is 3 floats (x, y, z) #define FLOATS_PER_POINT 3 uint collationBaseOffset = slotIndex * nPointsPerSlot * FLOATS_PER_POINT; // Base offset in intensity buffer for this slot (in floats) // Each intensity is 1 float uint intensityBaseOffset = slotIndex * nPointsPerSlot; DBG_PRINTF("Running kernel: about to process points in slot.\n"); // Initialize ambience counter for this work item uint ambienceCount = 0; // Process based on data type using nested ifs (outer) with loops (inner) if (dataType == 0) { // Type 0: LivoxRawPoint - 13 bytes per point // Structure: int32_t x, y, z (mm), uint8_t reflectivity for (uint i = 0; i < nPointsPerSlot; ++i) { __global uchar* pointPtr = pointsArray + (i * 13); // Read int coordinates (little-endian, unaligned-safe) int x_mm = readInt32LE(pointPtr + 0); int y_mm = readInt32LE(pointPtr + 4); int z_mm = readInt32LE(pointPtr + 8); uchar reflectivity = pointPtr[12]; DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n", slotIndex, i, x_mm, y_mm, z_mm, reflectivity); // Convert to PointXYZI (meters, float) float x = (float)x_mm / 1000.0f; float y = (float)y_mm / 1000.0f; float z = (float)z_mm / 1000.0f; float intensity = (float)reflectivity; // Print intensity if above 5 if (intensity > 5.0f) { DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", slotIndex, i, intensity); } // Write XYZ to collation buffer uint offset = collationBaseOffset + (i * FLOATS_PER_POINT); collation[offset + 0] = x; collation[offset + 1] = y; collation[offset + 2] = z; // Write intensity conditionally - divert to intensity buffer if attached, else discard if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + i] = intensity; } // Count high intensity values for ambience buffer if (intensity >= (float)ambienceHighVal) { ++ambienceCount; } // Don't write intensity to collation buffer } } else if (dataType == 2) { // Type 2: LivoxExtendRawPoint - 14 bytes per point // Structure: int32_t x, y, z (mm), uint8_t reflectivity, uint8_t tag (ignored) for (uint i = 0; i < nPointsPerSlot; ++i) { __global uchar* pointPtr = pointsArray + (i * 14); // Read int coordinates (little-endian, unaligned-safe) int x_mm = readInt32LE(pointPtr + 0); int y_mm = readInt32LE(pointPtr + 4); int z_mm = readInt32LE(pointPtr + 8); uchar reflectivity = pointPtr[12]; // tag at offset 13 is ignored DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n", slotIndex, i, x_mm, y_mm, z_mm, reflectivity); // Convert to PointXYZI (meters, float) float x = (float)x_mm / 1000.0f; float y = (float)y_mm / 1000.0f; float z = (float)z_mm / 1000.0f; float intensity = (float)reflectivity; // Print intensity if above 5 if (intensity > 5.0f) { DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", slotIndex, i, intensity); } // Write XYZ to collation buffer uint offset = collationBaseOffset + (i * FLOATS_PER_POINT); collation[offset + 0] = x; collation[offset + 1] = y; collation[offset + 2] = z; // Write intensity conditionally - divert to intensity buffer if attached, else discard if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + i] = intensity; } // Count high intensity values for ambience buffer if (intensity >= (float)ambienceHighVal) { ++ambienceCount; } // Don't write intensity to collation buffer } } else if (dataType == 4) { // Type 4: LivoxDualExtendRawPoint - 28 bytes per sample (2 points) // Structure: point1 (x1,y1,z1,reflectivity1,tag1), point2 (x2,y2,z2,reflectivity2,tag2) // nPointsPerSlot should be 96, but we have 48 samples * 2 points = 96 points uint nSamples = nPointsPerSlot / 2; uint pointIndex = 0; for (uint i = 0; i < nSamples; ++i) { __global uchar* samplePtr = pointsArray + (i * 28); // Process first point int x1_mm = readInt32LE(samplePtr + 0); int y1_mm = readInt32LE(samplePtr + 4); int z1_mm = readInt32LE(samplePtr + 8); uchar reflectivity1 = samplePtr[12]; // tag1 at offset 13 is ignored float x1 = (float)x1_mm / 1000.0f; float y1 = (float)y1_mm / 1000.0f; float z1 = (float)z1_mm / 1000.0f; float intensity1 = (float)reflectivity1; // Print intensity if above 5 if (intensity1 > 5.0f) { DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", slotIndex, pointIndex, intensity1); } uint offset1 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset1 + 0] = x1; collation[offset1 + 1] = y1; collation[offset1 + 2] = z1; // Write intensity conditionally - divert to intensity buffer if attached, else discard if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity1; } // Count high intensity values for ambience buffer if (intensity1 >= (float)ambienceHighVal) { ++ambienceCount; } // Don't write intensity to collation buffer ++pointIndex; // Process second point int x2_mm = readInt32LE(samplePtr + 14); int y2_mm = readInt32LE(samplePtr + 18); int z2_mm = readInt32LE(samplePtr + 22); uchar reflectivity2 = samplePtr[26]; // tag2 at offset 27 is ignored float x2 = (float)x2_mm / 1000.0f; float y2 = (float)y2_mm / 1000.0f; float z2 = (float)z2_mm / 1000.0f; float intensity2 = (float)reflectivity2; // Print intensity if above 5 if (intensity2 > 5.0f) { DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", slotIndex, pointIndex, intensity2); } uint offset2 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset2 + 0] = x2; collation[offset2 + 1] = y2; collation[offset2 + 2] = z2; // Write intensity conditionally - divert to intensity buffer if attached, else discard if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity2; } // Count high intensity values for ambience buffer if (intensity2 >= (float)ambienceHighVal) { ++ambienceCount; } // Don't write intensity to collation buffer ++pointIndex; } } else if (dataType == 7) { // Type 7: LivoxTripleExtendRawPoint - 42 bytes per sample (3 points) // Structure: point1, point2, point3 (each: x,y,z,reflectivity,tag) // nPointsPerSlot should be 90, but we have 30 samples * 3 points = 90 points uint nSamples = nPointsPerSlot / 3; uint pointIndex = 0; for (uint i = 0; i < nSamples; ++i) { __global uchar* samplePtr = pointsArray + (i * 42); // Process first point int x1_mm = readInt32LE(samplePtr + 0); int y1_mm = readInt32LE(samplePtr + 4); int z1_mm = readInt32LE(samplePtr + 8); uchar reflectivity1 = samplePtr[12]; // tag1 at offset 13 is ignored float x1 = (float)x1_mm / 1000.0f; float y1 = (float)y1_mm / 1000.0f; float z1 = (float)z1_mm / 1000.0f; float intensity1 = (float)reflectivity1; // Print intensity if above 5 if (intensity1 > 5.0f) { DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", slotIndex, pointIndex, intensity1); } uint offset1 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset1 + 0] = x1; collation[offset1 + 1] = y1; collation[offset1 + 2] = z1; // Write intensity conditionally - divert to intensity buffer if attached, else discard if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity1; } // Count high intensity values for ambience buffer if (intensity1 >= (float)ambienceHighVal) { ++ambienceCount; } // Don't write intensity to collation buffer ++pointIndex; // Process second point int x2_mm = readInt32LE(samplePtr + 14); int y2_mm = readInt32LE(samplePtr + 18); int z2_mm = readInt32LE(samplePtr + 22); uchar reflectivity2 = samplePtr[26]; // tag2 at offset 27 is ignored float x2 = (float)x2_mm / 1000.0f; float y2 = (float)y2_mm / 1000.0f; float z2 = (float)z2_mm / 1000.0f; float intensity2 = (float)reflectivity2; // Print intensity if above 5 if (intensity2 > 5.0f) { DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", slotIndex, pointIndex, intensity2); } uint offset2 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset2 + 0] = x2; collation[offset2 + 1] = y2; collation[offset2 + 2] = z2; // Write intensity conditionally - divert to intensity buffer if attached, else discard if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity2; } // Count high intensity values for ambience buffer if (intensity2 >= (float)ambienceHighVal) { ++ambienceCount; } // Don't write intensity to collation buffer ++pointIndex; // Process third point int x3_mm = readInt32LE(samplePtr + 28); int y3_mm = readInt32LE(samplePtr + 32); int z3_mm = readInt32LE(samplePtr + 36); uchar reflectivity3 = samplePtr[40]; // tag3 at offset 41 is ignored float x3 = (float)x3_mm / 1000.0f; float y3 = (float)y3_mm / 1000.0f; float z3 = (float)z3_mm / 1000.0f; float intensity3 = (float)reflectivity3; // Print intensity if above 5 if (intensity3 > 5.0f) { DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n", slotIndex, pointIndex, intensity3); } uint offset3 = collationBaseOffset + (pointIndex * FLOATS_PER_POINT); collation[offset3 + 0] = x3; collation[offset3 + 1] = y3; collation[offset3 + 2] = z3; // Write intensity conditionally - divert to intensity buffer if attached, else discard if (intensityBuffer != NULL) { intensityBuffer[intensityBaseOffset + pointIndex] = intensity3; } // Count high intensity values for ambience buffer if (intensity3 >= (float)ambienceHighVal) { ++ambienceCount; } // Don't write intensity to collation buffer ++pointIndex; } } // Unsupported data types are silently ignored // Write ambience count for this work item (once at the end) if (ambienceBuffer != NULL) { ambienceBuffer[slotIndex] = ambienceCount; } }