diff --git a/stimBuffApis/livoxGen1/collateDgrams.cl b/stimBuffApis/livoxGen1/collateDgrams.cl index ae85b2f..91e6d49 100644 --- a/stimBuffApis/livoxGen1/collateDgrams.cl +++ b/stimBuffApis/livoxGen1/collateDgrams.cl @@ -1,3 +1,18 @@ +// Helper function to read a little-endian int32 from unaligned memory +inline int readInt32LE(__global uchar* ptr) +{ + // Read 4 bytes in little-endian order and assemble into int + // Handle sign extension correctly for signed int + int b0 = (int)ptr[0]; + int b1 = (int)ptr[1]; + int b2 = (int)ptr[2]; + int b3 = (int)ptr[3]; + + // Assemble little-endian: b0 is LSB, b3 is MSB + int value = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + return value; +} + __kernel void collate( __global uchar* assembly, __global float* collation, @@ -10,10 +25,7 @@ __kernel void collate( uint slotIndex = get_global_id(0); // Bounds check - if (slotIndex >= nDgramsPerFrame) - { - return; - } + if (slotIndex >= nDgramsPerFrame) { return; } // Calculate slot address __global uchar* slotStart = assembly + firstSlotOffset @@ -37,10 +49,10 @@ __kernel void collate( { __global uchar* pointPtr = pointsArray + (i * 13); - // Read int coordinates (little-endian) - int x_mm = *((__global int*)(pointPtr + 0)); - int y_mm = *((__global int*)(pointPtr + 4)); - int z_mm = *((__global int*)(pointPtr + 8)); + // Read int coordinates (little-endian, unaligned-safe) + int x_mm = readInt32LE(pointPtr + 0); + int y_mm = readInt32LE(pointPtr + 4); + int z_mm = readInt32LE(pointPtr + 8); uchar reflectivity = pointPtr[12]; // Convert to PointXYZI (meters, float) @@ -65,10 +77,10 @@ __kernel void collate( { __global uchar* pointPtr = pointsArray + (i * 14); - // Read int coordinates (little-endian) - int x_mm = *((__global int*)(pointPtr + 0)); - int y_mm = *((__global int*)(pointPtr + 4)); - int z_mm = *((__global int*)(pointPtr + 8)); + // Read int coordinates (little-endian, unaligned-safe) + int x_mm = readInt32LE(pointPtr + 0); + int y_mm = readInt32LE(pointPtr + 4); + int z_mm = readInt32LE(pointPtr + 8); uchar reflectivity = pointPtr[12]; // tag at offset 13 is ignored @@ -99,9 +111,9 @@ __kernel void collate( __global uchar* samplePtr = pointsArray + (i * 28); // Process first point - int x1_mm = *((__global int*)(samplePtr + 0)); - int y1_mm = *((__global int*)(samplePtr + 4)); - int z1_mm = *((__global int*)(samplePtr + 8)); + int x1_mm = readInt32LE(samplePtr + 0); + int y1_mm = readInt32LE(samplePtr + 4); + int z1_mm = readInt32LE(samplePtr + 8); uchar reflectivity1 = samplePtr[12]; // tag1 at offset 13 is ignored @@ -118,9 +130,9 @@ __kernel void collate( ++pointIndex; // Process second point - int x2_mm = *((__global int*)(samplePtr + 14)); - int y2_mm = *((__global int*)(samplePtr + 18)); - int z2_mm = *((__global int*)(samplePtr + 22)); + int x2_mm = readInt32LE(samplePtr + 14); + int y2_mm = readInt32LE(samplePtr + 18); + int z2_mm = readInt32LE(samplePtr + 22); uchar reflectivity2 = samplePtr[26]; // tag2 at offset 27 is ignored @@ -150,9 +162,9 @@ __kernel void collate( __global uchar* samplePtr = pointsArray + (i * 42); // Process first point - int x1_mm = *((__global int*)(samplePtr + 0)); - int y1_mm = *((__global int*)(samplePtr + 4)); - int z1_mm = *((__global int*)(samplePtr + 8)); + int x1_mm = readInt32LE(samplePtr + 0); + int y1_mm = readInt32LE(samplePtr + 4); + int z1_mm = readInt32LE(samplePtr + 8); uchar reflectivity1 = samplePtr[12]; // tag1 at offset 13 is ignored @@ -169,9 +181,9 @@ __kernel void collate( ++pointIndex; // Process second point - int x2_mm = *((__global int*)(samplePtr + 14)); - int y2_mm = *((__global int*)(samplePtr + 18)); - int z2_mm = *((__global int*)(samplePtr + 22)); + int x2_mm = readInt32LE(samplePtr + 14); + int y2_mm = readInt32LE(samplePtr + 18); + int z2_mm = readInt32LE(samplePtr + 22); uchar reflectivity2 = samplePtr[26]; // tag2 at offset 27 is ignored @@ -188,9 +200,9 @@ __kernel void collate( ++pointIndex; // Process third point - int x3_mm = *((__global int*)(samplePtr + 28)); - int y3_mm = *((__global int*)(samplePtr + 32)); - int z3_mm = *((__global int*)(samplePtr + 36)); + int x3_mm = readInt32LE(samplePtr + 28); + int y3_mm = readInt32LE(samplePtr + 32); + int z3_mm = readInt32LE(samplePtr + 36); uchar reflectivity3 = samplePtr[40]; // tag3 at offset 41 is ignored