livoxG1:collateDgrams.cl: Fix unaligned reads

This commit is contained in:
2025-11-09 11:48:53 -04:00
parent 7977f0bcc9
commit 55116b1d41
+39 -27
View File
@@ -1,3 +1,18 @@
// Helper function to read a little-endian int32 from unaligned memory
inline int readInt32LE(__global uchar* ptr)
{
// Read 4 bytes in little-endian order and assemble into int
// Handle sign extension correctly for signed int
int b0 = (int)ptr[0];
int b1 = (int)ptr[1];
int b2 = (int)ptr[2];
int b3 = (int)ptr[3];
// Assemble little-endian: b0 is LSB, b3 is MSB
int value = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
return value;
}
__kernel void collate(
__global uchar* assembly,
__global float* collation,
@@ -10,10 +25,7 @@ __kernel void collate(
uint slotIndex = get_global_id(0);
// Bounds check
if (slotIndex >= nDgramsPerFrame)
{
return;
}
if (slotIndex >= nDgramsPerFrame) { return; }
// Calculate slot address
__global uchar* slotStart = assembly + firstSlotOffset
@@ -37,10 +49,10 @@ __kernel void collate(
{
__global uchar* pointPtr = pointsArray + (i * 13);
// Read int coordinates (little-endian)
int x_mm = *((__global int*)(pointPtr + 0));
int y_mm = *((__global int*)(pointPtr + 4));
int z_mm = *((__global int*)(pointPtr + 8));
// Read int coordinates (little-endian, unaligned-safe)
int x_mm = readInt32LE(pointPtr + 0);
int y_mm = readInt32LE(pointPtr + 4);
int z_mm = readInt32LE(pointPtr + 8);
uchar reflectivity = pointPtr[12];
// Convert to PointXYZI (meters, float)
@@ -65,10 +77,10 @@ __kernel void collate(
{
__global uchar* pointPtr = pointsArray + (i * 14);
// Read int coordinates (little-endian)
int x_mm = *((__global int*)(pointPtr + 0));
int y_mm = *((__global int*)(pointPtr + 4));
int z_mm = *((__global int*)(pointPtr + 8));
// Read int coordinates (little-endian, unaligned-safe)
int x_mm = readInt32LE(pointPtr + 0);
int y_mm = readInt32LE(pointPtr + 4);
int z_mm = readInt32LE(pointPtr + 8);
uchar reflectivity = pointPtr[12];
// tag at offset 13 is ignored
@@ -99,9 +111,9 @@ __kernel void collate(
__global uchar* samplePtr = pointsArray + (i * 28);
// Process first point
int x1_mm = *((__global int*)(samplePtr + 0));
int y1_mm = *((__global int*)(samplePtr + 4));
int z1_mm = *((__global int*)(samplePtr + 8));
int x1_mm = readInt32LE(samplePtr + 0);
int y1_mm = readInt32LE(samplePtr + 4);
int z1_mm = readInt32LE(samplePtr + 8);
uchar reflectivity1 = samplePtr[12];
// tag1 at offset 13 is ignored
@@ -118,9 +130,9 @@ __kernel void collate(
++pointIndex;
// Process second point
int x2_mm = *((__global int*)(samplePtr + 14));
int y2_mm = *((__global int*)(samplePtr + 18));
int z2_mm = *((__global int*)(samplePtr + 22));
int x2_mm = readInt32LE(samplePtr + 14);
int y2_mm = readInt32LE(samplePtr + 18);
int z2_mm = readInt32LE(samplePtr + 22);
uchar reflectivity2 = samplePtr[26];
// tag2 at offset 27 is ignored
@@ -150,9 +162,9 @@ __kernel void collate(
__global uchar* samplePtr = pointsArray + (i * 42);
// Process first point
int x1_mm = *((__global int*)(samplePtr + 0));
int y1_mm = *((__global int*)(samplePtr + 4));
int z1_mm = *((__global int*)(samplePtr + 8));
int x1_mm = readInt32LE(samplePtr + 0);
int y1_mm = readInt32LE(samplePtr + 4);
int z1_mm = readInt32LE(samplePtr + 8);
uchar reflectivity1 = samplePtr[12];
// tag1 at offset 13 is ignored
@@ -169,9 +181,9 @@ __kernel void collate(
++pointIndex;
// Process second point
int x2_mm = *((__global int*)(samplePtr + 14));
int y2_mm = *((__global int*)(samplePtr + 18));
int z2_mm = *((__global int*)(samplePtr + 22));
int x2_mm = readInt32LE(samplePtr + 14);
int y2_mm = readInt32LE(samplePtr + 18);
int z2_mm = readInt32LE(samplePtr + 22);
uchar reflectivity2 = samplePtr[26];
// tag2 at offset 27 is ignored
@@ -188,9 +200,9 @@ __kernel void collate(
++pointIndex;
// Process third point
int x3_mm = *((__global int*)(samplePtr + 28));
int y3_mm = *((__global int*)(samplePtr + 32));
int z3_mm = *((__global int*)(samplePtr + 36));
int x3_mm = readInt32LE(samplePtr + 28);
int y3_mm = readInt32LE(samplePtr + 32);
int z3_mm = readInt32LE(samplePtr + 36);
uchar reflectivity3 = samplePtr[40];
// tag3 at offset 41 is ignored