361 lines
11 KiB
Common Lisp
361 lines
11 KiB
Common Lisp
// Debug macro: define DEBUG_COLLATE_DGRAMS to enable printf statements
|
|
// #define DEBUG_COLLATE_DGRAMS
|
|
#ifdef DEBUG_COLLATE_DGRAMS
|
|
#define DBG_PRINTF(...) printf(__VA_ARGS__)
|
|
#else
|
|
#define DBG_PRINTF(...)
|
|
#endif
|
|
|
|
// Helper function to read a little-endian int32 from unaligned memory
|
|
inline int readInt32LE(__global uchar* ptr)
|
|
{
|
|
// Read 4 bytes in little-endian order and assemble into int
|
|
// Handle sign extension correctly for signed int
|
|
int b0 = (int)ptr[0];
|
|
int b1 = (int)ptr[1];
|
|
int b2 = (int)ptr[2];
|
|
int b3 = (int)ptr[3];
|
|
|
|
// Assemble little-endian: b0 is LSB, b3 is MSB
|
|
int value = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
|
|
return value;
|
|
}
|
|
|
|
__kernel void collate(
|
|
__global uchar* assembly,
|
|
__global float* collation,
|
|
__global float* intensityBuffer,
|
|
__global uint* ambienceBuffer,
|
|
uint ambienceHighVal,
|
|
uint slotStride,
|
|
uint nPointsPerSlot,
|
|
uint nDgramsPerFrame)
|
|
{
|
|
// Get work item index (slot index)
|
|
uint slotIndex = get_global_id(0);
|
|
|
|
// Bounds check
|
|
if (slotIndex >= nDgramsPerFrame) { return; }
|
|
|
|
// Calculate slot address
|
|
__global uchar* slotStart = assembly + (slotIndex * slotStride);
|
|
|
|
// Read data_type from offset 9 (1 byte)
|
|
uchar dataType = slotStart[9];
|
|
|
|
// Get points array pointer (after 18-byte header)
|
|
__global uchar* pointsArray = slotStart + 18;
|
|
|
|
// Base offset in collation buffer for this slot (in floats)
|
|
// Each PointXYZ is 3 floats (x, y, z)
|
|
#define FLOATS_PER_POINT 3
|
|
uint collationBaseOffset = slotIndex * nPointsPerSlot * FLOATS_PER_POINT;
|
|
// Base offset in intensity buffer for this slot (in floats)
|
|
// Each intensity is 1 float
|
|
uint intensityBaseOffset = slotIndex * nPointsPerSlot;
|
|
DBG_PRINTF("Running kernel: about to process points in slot.\n");
|
|
|
|
// Initialize ambience counter for this work item
|
|
uint ambienceCount = 0;
|
|
|
|
// Process based on data type using nested ifs (outer) with loops (inner)
|
|
if (dataType == 0)
|
|
{
|
|
// Type 0: LivoxRawPoint - 13 bytes per point
|
|
// Structure: int32_t x, y, z (mm), uint8_t reflectivity
|
|
for (uint i = 0; i < nPointsPerSlot; ++i)
|
|
{
|
|
__global uchar* pointPtr = pointsArray + (i * 13);
|
|
|
|
// Read int coordinates (little-endian, unaligned-safe)
|
|
int x_mm = readInt32LE(pointPtr + 0);
|
|
int y_mm = readInt32LE(pointPtr + 4);
|
|
int z_mm = readInt32LE(pointPtr + 8);
|
|
uchar reflectivity = pointPtr[12];
|
|
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n",
|
|
slotIndex, i, x_mm, y_mm, z_mm, reflectivity);
|
|
// Convert to PointXYZI (meters, float)
|
|
float x = (float)x_mm / 1000.0f;
|
|
float y = (float)y_mm / 1000.0f;
|
|
float z = (float)z_mm / 1000.0f;
|
|
float intensity = (float)reflectivity;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, i, intensity);
|
|
}
|
|
|
|
// Write XYZ to collation buffer
|
|
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
|
collation[offset + 0] = x;
|
|
collation[offset + 1] = y;
|
|
collation[offset + 2] = z;
|
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
|
if (intensityBuffer != NULL) {
|
|
intensityBuffer[intensityBaseOffset + i] = intensity;
|
|
}
|
|
// Count high intensity values for ambience buffer
|
|
if (intensity >= (float)ambienceHighVal) {
|
|
++ambienceCount;
|
|
}
|
|
// Don't write intensity to collation buffer
|
|
}
|
|
}
|
|
else if (dataType == 2)
|
|
{
|
|
// Type 2: LivoxExtendRawPoint - 14 bytes per point
|
|
// Structure: int32_t x, y, z (mm), uint8_t reflectivity, uint8_t tag (ignored)
|
|
for (uint i = 0; i < nPointsPerSlot; ++i)
|
|
{
|
|
__global uchar* pointPtr = pointsArray + (i * 14);
|
|
|
|
// Read int coordinates (little-endian, unaligned-safe)
|
|
int x_mm = readInt32LE(pointPtr + 0);
|
|
int y_mm = readInt32LE(pointPtr + 4);
|
|
int z_mm = readInt32LE(pointPtr + 8);
|
|
uchar reflectivity = pointPtr[12];
|
|
// tag at offset 13 is ignored
|
|
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: x_mm=%d, y_mm=%d, z_mm=%d, reflectivity=%d\n",
|
|
slotIndex, i, x_mm, y_mm, z_mm, reflectivity);
|
|
// Convert to PointXYZI (meters, float)
|
|
float x = (float)x_mm / 1000.0f;
|
|
float y = (float)y_mm / 1000.0f;
|
|
float z = (float)z_mm / 1000.0f;
|
|
float intensity = (float)reflectivity;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, i, intensity);
|
|
}
|
|
|
|
// Write XYZ to collation buffer
|
|
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
|
collation[offset + 0] = x;
|
|
collation[offset + 1] = y;
|
|
collation[offset + 2] = z;
|
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
|
if (intensityBuffer != NULL) {
|
|
intensityBuffer[intensityBaseOffset + i] = intensity;
|
|
}
|
|
// Count high intensity values for ambience buffer
|
|
if (intensity >= (float)ambienceHighVal) {
|
|
++ambienceCount;
|
|
}
|
|
// Don't write intensity to collation buffer
|
|
}
|
|
}
|
|
else if (dataType == 4)
|
|
{
|
|
// Type 4: LivoxDualExtendRawPoint - 28 bytes per sample (2 points)
|
|
// Structure: point1 (x1,y1,z1,reflectivity1,tag1), point2 (x2,y2,z2,reflectivity2,tag2)
|
|
// nPointsPerSlot should be 96, but we have 48 samples * 2 points = 96 points
|
|
uint nSamples = nPointsPerSlot / 2;
|
|
uint pointIndex = 0;
|
|
|
|
for (uint i = 0; i < nSamples; ++i)
|
|
{
|
|
__global uchar* samplePtr = pointsArray + (i * 28);
|
|
|
|
// Process first point
|
|
int x1_mm = readInt32LE(samplePtr + 0);
|
|
int y1_mm = readInt32LE(samplePtr + 4);
|
|
int z1_mm = readInt32LE(samplePtr + 8);
|
|
uchar reflectivity1 = samplePtr[12];
|
|
// tag1 at offset 13 is ignored
|
|
|
|
float x1 = (float)x1_mm / 1000.0f;
|
|
float y1 = (float)y1_mm / 1000.0f;
|
|
float z1 = (float)z1_mm / 1000.0f;
|
|
float intensity1 = (float)reflectivity1;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity1 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity1);
|
|
}
|
|
|
|
uint offset1 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset1 + 0] = x1;
|
|
collation[offset1 + 1] = y1;
|
|
collation[offset1 + 2] = z1;
|
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
|
if (intensityBuffer != NULL) {
|
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity1;
|
|
}
|
|
// Count high intensity values for ambience buffer
|
|
if (intensity1 >= (float)ambienceHighVal) {
|
|
++ambienceCount;
|
|
}
|
|
// Don't write intensity to collation buffer
|
|
++pointIndex;
|
|
|
|
// Process second point
|
|
int x2_mm = readInt32LE(samplePtr + 14);
|
|
int y2_mm = readInt32LE(samplePtr + 18);
|
|
int z2_mm = readInt32LE(samplePtr + 22);
|
|
uchar reflectivity2 = samplePtr[26];
|
|
// tag2 at offset 27 is ignored
|
|
|
|
float x2 = (float)x2_mm / 1000.0f;
|
|
float y2 = (float)y2_mm / 1000.0f;
|
|
float z2 = (float)z2_mm / 1000.0f;
|
|
float intensity2 = (float)reflectivity2;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity2 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity2);
|
|
}
|
|
|
|
uint offset2 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset2 + 0] = x2;
|
|
collation[offset2 + 1] = y2;
|
|
collation[offset2 + 2] = z2;
|
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
|
if (intensityBuffer != NULL) {
|
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity2;
|
|
}
|
|
// Count high intensity values for ambience buffer
|
|
if (intensity2 >= (float)ambienceHighVal) {
|
|
++ambienceCount;
|
|
}
|
|
// Don't write intensity to collation buffer
|
|
++pointIndex;
|
|
}
|
|
}
|
|
else if (dataType == 7)
|
|
{
|
|
// Type 7: LivoxTripleExtendRawPoint - 42 bytes per sample (3 points)
|
|
// Structure: point1, point2, point3 (each: x,y,z,reflectivity,tag)
|
|
// nPointsPerSlot should be 90, but we have 30 samples * 3 points = 90 points
|
|
uint nSamples = nPointsPerSlot / 3;
|
|
uint pointIndex = 0;
|
|
|
|
for (uint i = 0; i < nSamples; ++i)
|
|
{
|
|
__global uchar* samplePtr = pointsArray + (i * 42);
|
|
|
|
// Process first point
|
|
int x1_mm = readInt32LE(samplePtr + 0);
|
|
int y1_mm = readInt32LE(samplePtr + 4);
|
|
int z1_mm = readInt32LE(samplePtr + 8);
|
|
uchar reflectivity1 = samplePtr[12];
|
|
// tag1 at offset 13 is ignored
|
|
|
|
float x1 = (float)x1_mm / 1000.0f;
|
|
float y1 = (float)y1_mm / 1000.0f;
|
|
float z1 = (float)z1_mm / 1000.0f;
|
|
float intensity1 = (float)reflectivity1;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity1 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity1);
|
|
}
|
|
|
|
uint offset1 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset1 + 0] = x1;
|
|
collation[offset1 + 1] = y1;
|
|
collation[offset1 + 2] = z1;
|
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
|
if (intensityBuffer != NULL) {
|
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity1;
|
|
}
|
|
// Count high intensity values for ambience buffer
|
|
if (intensity1 >= (float)ambienceHighVal) {
|
|
++ambienceCount;
|
|
}
|
|
// Don't write intensity to collation buffer
|
|
++pointIndex;
|
|
|
|
// Process second point
|
|
int x2_mm = readInt32LE(samplePtr + 14);
|
|
int y2_mm = readInt32LE(samplePtr + 18);
|
|
int z2_mm = readInt32LE(samplePtr + 22);
|
|
uchar reflectivity2 = samplePtr[26];
|
|
// tag2 at offset 27 is ignored
|
|
|
|
float x2 = (float)x2_mm / 1000.0f;
|
|
float y2 = (float)y2_mm / 1000.0f;
|
|
float z2 = (float)z2_mm / 1000.0f;
|
|
float intensity2 = (float)reflectivity2;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity2 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity2);
|
|
}
|
|
|
|
uint offset2 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset2 + 0] = x2;
|
|
collation[offset2 + 1] = y2;
|
|
collation[offset2 + 2] = z2;
|
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
|
if (intensityBuffer != NULL) {
|
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity2;
|
|
}
|
|
// Count high intensity values for ambience buffer
|
|
if (intensity2 >= (float)ambienceHighVal) {
|
|
++ambienceCount;
|
|
}
|
|
// Don't write intensity to collation buffer
|
|
++pointIndex;
|
|
|
|
// Process third point
|
|
int x3_mm = readInt32LE(samplePtr + 28);
|
|
int y3_mm = readInt32LE(samplePtr + 32);
|
|
int z3_mm = readInt32LE(samplePtr + 36);
|
|
uchar reflectivity3 = samplePtr[40];
|
|
// tag3 at offset 41 is ignored
|
|
|
|
float x3 = (float)x3_mm / 1000.0f;
|
|
float y3 = (float)y3_mm / 1000.0f;
|
|
float z3 = (float)z3_mm / 1000.0f;
|
|
float intensity3 = (float)reflectivity3;
|
|
|
|
// Print intensity if above 5
|
|
if (intensity3 > 5.0f)
|
|
{
|
|
DBG_PRINTF("collate[slot=%u,point=%u]: intensity=%.1f\n",
|
|
slotIndex, pointIndex, intensity3);
|
|
}
|
|
|
|
uint offset3 = collationBaseOffset
|
|
+ (pointIndex * FLOATS_PER_POINT);
|
|
collation[offset3 + 0] = x3;
|
|
collation[offset3 + 1] = y3;
|
|
collation[offset3 + 2] = z3;
|
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
|
if (intensityBuffer != NULL) {
|
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity3;
|
|
}
|
|
// Count high intensity values for ambience buffer
|
|
if (intensity3 >= (float)ambienceHighVal) {
|
|
++ambienceCount;
|
|
}
|
|
// Don't write intensity to collation buffer
|
|
++pointIndex;
|
|
}
|
|
}
|
|
// Unsupported data types are silently ignored
|
|
|
|
// Write ambience count for this work item (once at the end)
|
|
if (ambienceBuffer != NULL) {
|
|
ambienceBuffer[slotIndex] = ambienceCount;
|
|
}
|
|
}
|