96e64e24b8
When mapping in the collationBuff we only need to supply CL_MAP_WRITE and not CL_MAP_WRITE_INVALIDATE_REGION since we don't care to preserve the contents of the collation buff as input to the collation kernel.
293 lines
8.7 KiB
Common Lisp
293 lines
8.7 KiB
Common Lisp
// Debug macro: define DEBUG_COLLATE_DGRAMS to enable printf statements
|
|
// #define DEBUG_COLLATE_DGRAMS
|
|
#ifdef DEBUG_COLLATE_DGRAMS
|
|
#define DBG_PRINTF(...) printf(__VA_ARGS__)
|
|
#else
|
|
#define DBG_PRINTF(...)
|
|
#endif
|
|
|
|
// Helper function to read a little-endian int32 from unaligned memory
|
|
inline int readInt32LE(__global uchar* ptr)
|
|
{
|
|
// Read 4 bytes in little-endian order and assemble into int
|
|
// Handle sign extension correctly for signed int
|
|
int b0 = (int)ptr[0];
|
|
int b1 = (int)ptr[1];
|
|
int b2 = (int)ptr[2];
|
|
int b3 = (int)ptr[3];
|
|
|
|
// Assemble little-endian: b0 is LSB, b3 is MSB
|
|
int value = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
|
|
return value;
|
|
}
|
|
|
|
__kernel void collate(
|
|
__global uchar* assembly,
|
|
__global float* collation,
|
|
uint slotStride,
|
|
uint firstSlotOffset,
|
|
uint nPointsPerSlot,
|
|
uint nDgramsPerFrame)
|
|
{
|
|
// Get work item index (slot index)
|
|
uint slotIndex = get_global_id(0);
|
|
|
|
// Bounds check
|
|
if (slotIndex >= nDgramsPerFrame) { return; }
|
|
|
|
// Calculate slot address
|
|
__global uchar* slotStart = assembly + firstSlotOffset
|
|
+ (slotIndex * slotStride);
|
|
|
|
// Read data_type from offset 9 (1 byte)
|
|
uchar dataType = slotStart[9];
|
|
|
|
// Get points array pointer (after 18-byte header)
|
|
__global uchar* pointsArray = slotStart + 18;
|
|
|
|
// Base offset in collation buffer for this slot (in floats)
|
|
// Each PointXYZI is 4 floats (x, y, z, intensity)
|
|
#define FLOATS_PER_POINT 4
|
|
uint collationBaseOffset = slotIndex * nPointsPerSlot * FLOATS_PER_POINT;
|
|
DBG_PRINTF("Running kernel: about to process points in slot.\n");
|
|
|
|
// Process based on data type using nested ifs (outer) with loops (inner)
|
|
if (dataType == 0)
|
|
{
|
|
// Type 0: LivoxRawPoint - 13 bytes per point
|
|
// Structure: int32_t x, y, z (mm), uint8_t reflectivity
|
|
for (uint i = 0; i < nPointsPerSlot; ++i)
|
|
{
|
|
__global uchar* pointPtr = pointsArray + (i * 13);
|
|
|
|
// Read int coordinates (little-endian, unaligned-safe)
|
|
int x_mm = readInt32LE(pointPtr + 0);
|
|
int y_mm = readInt32LE(pointPtr + 4);
|
|
int z_mm = readInt32LE(pointPtr + 8);
|
|
uchar reflectivity = pointPtr[12];
|
|
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n",
|
|
slotIndex, i, x_mm, y_mm, z_mm, reflectivity);
|
|
// Convert to PointXYZI (meters, float)
|
|
float x = (float)x_mm / 1000.0f;
|
|
float y = (float)y_mm / 1000.0f;
|
|
float z = (float)z_mm / 1000.0f;
|
|
float intensity = (float)reflectivity;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, i, intensity);
|
|
}
|
|
|
|
// Write to collation buffer
|
|
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
|
collation[offset + 0] = x;
|
|
collation[offset + 1] = y;
|
|
collation[offset + 2] = z;
|
|
collation[offset + 3] = intensity;
|
|
}
|
|
}
|
|
else if (dataType == 2)
|
|
{
|
|
// Type 2: LivoxExtendRawPoint - 14 bytes per point
|
|
// Structure: int32_t x, y, z (mm), uint8_t reflectivity, uint8_t tag (ignored)
|
|
for (uint i = 0; i < nPointsPerSlot; ++i)
|
|
{
|
|
__global uchar* pointPtr = pointsArray + (i * 14);
|
|
|
|
// Read int coordinates (little-endian, unaligned-safe)
|
|
int x_mm = readInt32LE(pointPtr + 0);
|
|
int y_mm = readInt32LE(pointPtr + 4);
|
|
int z_mm = readInt32LE(pointPtr + 8);
|
|
uchar reflectivity = pointPtr[12];
|
|
// tag at offset 13 is ignored
|
|
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n",
|
|
slotIndex, i, x_mm, y_mm, z_mm, reflectivity);
|
|
// Convert to PointXYZI (meters, float)
|
|
float x = (float)x_mm / 1000.0f;
|
|
float y = (float)y_mm / 1000.0f;
|
|
float z = (float)z_mm / 1000.0f;
|
|
float intensity = (float)reflectivity;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, i, intensity);
|
|
}
|
|
|
|
// Write to collation buffer
|
|
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
|
collation[offset + 0] = x;
|
|
collation[offset + 1] = y;
|
|
collation[offset + 2] = z;
|
|
collation[offset + 3] = intensity;
|
|
}
|
|
}
|
|
else if (dataType == 4)
|
|
{
|
|
// Type 4: LivoxDualExtendRawPoint - 28 bytes per sample (2 points)
|
|
// Structure: point1 (x1,y1,z1,reflectivity1,tag1), point2 (x2,y2,z2,reflectivity2,tag2)
|
|
// nPointsPerSlot should be 96, but we have 48 samples * 2 points = 96 points
|
|
uint nSamples = nPointsPerSlot / 2;
|
|
uint pointIndex = 0;
|
|
|
|
for (uint i = 0; i < nSamples; ++i)
|
|
{
|
|
__global uchar* samplePtr = pointsArray + (i * 28);
|
|
|
|
// Process first point
|
|
int x1_mm = readInt32LE(samplePtr + 0);
|
|
int y1_mm = readInt32LE(samplePtr + 4);
|
|
int z1_mm = readInt32LE(samplePtr + 8);
|
|
uchar reflectivity1 = samplePtr[12];
|
|
// tag1 at offset 13 is ignored
|
|
|
|
float x1 = (float)x1_mm / 1000.0f;
|
|
float y1 = (float)y1_mm / 1000.0f;
|
|
float z1 = (float)z1_mm / 1000.0f;
|
|
float intensity1 = (float)reflectivity1;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity1 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity1);
|
|
}
|
|
|
|
uint offset1 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset1 + 0] = x1;
|
|
collation[offset1 + 1] = y1;
|
|
collation[offset1 + 2] = z1;
|
|
collation[offset1 + 3] = intensity1;
|
|
++pointIndex;
|
|
|
|
// Process second point
|
|
int x2_mm = readInt32LE(samplePtr + 14);
|
|
int y2_mm = readInt32LE(samplePtr + 18);
|
|
int z2_mm = readInt32LE(samplePtr + 22);
|
|
uchar reflectivity2 = samplePtr[26];
|
|
// tag2 at offset 27 is ignored
|
|
|
|
float x2 = (float)x2_mm / 1000.0f;
|
|
float y2 = (float)y2_mm / 1000.0f;
|
|
float z2 = (float)z2_mm / 1000.0f;
|
|
float intensity2 = (float)reflectivity2;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity2 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity2);
|
|
}
|
|
|
|
uint offset2 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset2 + 0] = x2;
|
|
collation[offset2 + 1] = y2;
|
|
collation[offset2 + 2] = z2;
|
|
collation[offset2 + 3] = intensity2;
|
|
++pointIndex;
|
|
}
|
|
}
|
|
else if (dataType == 7)
|
|
{
|
|
// Type 7: LivoxTripleExtendRawPoint - 42 bytes per sample (3 points)
|
|
// Structure: point1, point2, point3 (each: x,y,z,reflectivity,tag)
|
|
// nPointsPerSlot should be 90, but we have 30 samples * 3 points = 90 points
|
|
uint nSamples = nPointsPerSlot / 3;
|
|
uint pointIndex = 0;
|
|
|
|
for (uint i = 0; i < nSamples; ++i)
|
|
{
|
|
__global uchar* samplePtr = pointsArray + (i * 42);
|
|
|
|
// Process first point
|
|
int x1_mm = readInt32LE(samplePtr + 0);
|
|
int y1_mm = readInt32LE(samplePtr + 4);
|
|
int z1_mm = readInt32LE(samplePtr + 8);
|
|
uchar reflectivity1 = samplePtr[12];
|
|
// tag1 at offset 13 is ignored
|
|
|
|
float x1 = (float)x1_mm / 1000.0f;
|
|
float y1 = (float)y1_mm / 1000.0f;
|
|
float z1 = (float)z1_mm / 1000.0f;
|
|
float intensity1 = (float)reflectivity1;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity1 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity1);
|
|
}
|
|
|
|
uint offset1 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset1 + 0] = x1;
|
|
collation[offset1 + 1] = y1;
|
|
collation[offset1 + 2] = z1;
|
|
collation[offset1 + 3] = intensity1;
|
|
++pointIndex;
|
|
|
|
// Process second point
|
|
int x2_mm = readInt32LE(samplePtr + 14);
|
|
int y2_mm = readInt32LE(samplePtr + 18);
|
|
int z2_mm = readInt32LE(samplePtr + 22);
|
|
uchar reflectivity2 = samplePtr[26];
|
|
// tag2 at offset 27 is ignored
|
|
|
|
float x2 = (float)x2_mm / 1000.0f;
|
|
float y2 = (float)y2_mm / 1000.0f;
|
|
float z2 = (float)z2_mm / 1000.0f;
|
|
float intensity2 = (float)reflectivity2;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity2 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity2);
|
|
}
|
|
|
|
uint offset2 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset2 + 0] = x2;
|
|
collation[offset2 + 1] = y2;
|
|
collation[offset2 + 2] = z2;
|
|
collation[offset2 + 3] = intensity2;
|
|
++pointIndex;
|
|
|
|
// Process third point
|
|
int x3_mm = readInt32LE(samplePtr + 28);
|
|
int y3_mm = readInt32LE(samplePtr + 32);
|
|
int z3_mm = readInt32LE(samplePtr + 36);
|
|
uchar reflectivity3 = samplePtr[40];
|
|
// tag3 at offset 41 is ignored
|
|
|
|
float x3 = (float)x3_mm / 1000.0f;
|
|
float y3 = (float)y3_mm / 1000.0f;
|
|
float z3 = (float)z3_mm / 1000.0f;
|
|
float intensity3 = (float)reflectivity3;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity3 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity3);
|
|
}
|
|
|
|
uint offset3 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset3 + 0] = x3;
|
|
collation[offset3 + 1] = y3;
|
|
collation[offset3 + 2] = z3;
|
|
collation[offset3 + 3] = intensity3;
|
|
++pointIndex;
|
|
}
|
|
}
|
|
// Unsupported data types are silently ignored
|
|
}
|