From 5ff6a4ee0b209b29a190ae1948ac553312a2941f Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Sat, 8 Nov 2025 12:21:04 -0400 Subject: [PATCH] OClCollMeshEngn: implement start/stop/setup/finalize --- .../openClCollatingAndMeshingEngine.cpp | 324 ++++++++++++++++-- .../openClCollatingAndMeshingEngine.h | 34 +- 2 files changed, 333 insertions(+), 25 deletions(-) diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index ea9efc9..58e226f 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -1,8 +1,13 @@ +#include #include #include #include +#include +#include +#include #include "openClCollatingAndMeshingEngine.h" #include "pcloudStimulusBuffer.h" +#include "openClKernels.h" namespace smo { namespace stim_buff { @@ -16,9 +21,15 @@ commandQueue(nullptr), program(nullptr), kernel(nullptr), isSetup(false), -assemblyBuffer(nullptr), -xyzBuffer(nullptr), -intensityBuffer(nullptr) +clAssemblyBuffer(nullptr), +clCollationBuffer(nullptr), +isRunning(false), +currentKernelEvent(nullptr), +memoryPinned(false), +assemblyBufferPtr(nullptr), +assemblyBufferSize(0), +collationBufferPtr(nullptr), +collationBufferSize(0) { } @@ -39,7 +50,8 @@ bool OpenClCollatingAndMeshingEngine::setup() // Get platform cl_uint numPlatforms; err = clGetPlatformIDs(1, &platform, &numPlatforms); - if (err != CL_SUCCESS || numPlatforms == 0) { + if (err != CL_SUCCESS || numPlatforms == 0) + { std::cerr << __func__ << ": failed to get OpenCL platform: " << err << std::endl; return false; @@ -47,7 +59,8 @@ bool OpenClCollatingAndMeshingEngine::setup() // Get device err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, nullptr); - if (err != CL_SUCCESS) { + if (err != CL_SUCCESS) + { std::cerr << __func__ << ": failed to get GPU device: " << err << std::endl; return false; @@ -55,7 +68,8 @@ bool OpenClCollatingAndMeshingEngine::setup() // Create context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err); - if (err != CL_SUCCESS || !context) { + if (err != CL_SUCCESS || !context) + { std::cerr << __func__ << ": failed to create OpenCL context: " << err << std::endl; goto cleanup; @@ -64,14 +78,116 @@ bool OpenClCollatingAndMeshingEngine::setup() // Create command queue commandQueue = clCreateCommandQueueWithProperties( context, device, queueProps, &err); - if (err != CL_SUCCESS || !commandQueue) { + + if (err != CL_SUCCESS || !commandQueue) + { std::cerr << __func__ << ": failed to create command queue: " << err << std::endl; goto cleanup; } - // TODO: Create program and kernel - // TODO: Create buffers + // Declare variables early to avoid goto crossing initialization + struct iovec assemblyIov; + struct iovec collationIov; + const char* kernelSource; + size_t kernelSourceLen; + + // Get StagingBuffer memory pointers from parent + assemblyIov = parent.assemblyBuffer.getClEngineIovec(); + collationIov = parent.collationBuffer.getClEngineIovec(); + + assemblyBufferPtr = assemblyIov.iov_base; + assemblyBufferSize = assemblyIov.iov_len; + collationBufferPtr = collationIov.iov_base; + collationBufferSize = collationIov.iov_len; + + // Pin memory pages using mlock() + if (mlock(assemblyBufferPtr, assemblyBufferSize) != 0) + { + std::cerr << __func__ << ": failed to pin assembly buffer memory: " + << strerror(errno) << std::endl; + goto cleanup; + } + + if (mlock(collationBufferPtr, collationBufferSize) != 0) + { + std::cerr << __func__ << ": failed to pin collation buffer memory: " + << strerror(errno) << std::endl; + munlock(assemblyBufferPtr, assemblyBufferSize); + goto cleanup; + } + + memoryPinned = true; + + // Create OpenCL buffers using CL_MEM_USE_HOST_PTR for zero-copy + clAssemblyBuffer = clCreateBuffer( + context, + CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, + assemblyBufferSize, assemblyBufferPtr, + &err); + + if (err != CL_SUCCESS || !clAssemblyBuffer) + { + std::cerr << __func__ << ": failed to create assembly buffer: " + << err << std::endl; + goto cleanup; + } + + clCollationBuffer = clCreateBuffer( + context, + CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + collationBufferSize, collationBufferPtr, + &err); + + if (err != CL_SUCCESS || !clCollationBuffer) + { + std::cerr << __func__ << ": failed to create collation buffer: " + << err << std::endl; + goto cleanup; + } + + // Create program and kernel from external source + kernelSource = collateKernelStart; + kernelSourceLen = collateKernelNBytes; + program = clCreateProgramWithSource( + context, 1, &kernelSource, &kernelSourceLen, &err); + + if (err != CL_SUCCESS || !program) + { + std::cerr << __func__ << ": failed to create program: " + << err << std::endl; + goto cleanup; + } + + err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to build program: " + << err << std::endl; + + // Print build log if available + size_t logSize = 0; + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, + 0, nullptr, &logSize); + + if (logSize > 0) + { + std::vector log(logSize); + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, + logSize, log.data(), nullptr); + std::cerr << "Build log: " << log.data() << std::endl; + } + + goto cleanup; + } + + kernel = clCreateKernel(program, "collate", &err); + if (err != CL_SUCCESS || !kernel) + { + std::cerr << __func__ << ": failed to create kernel: " + << err << std::endl; + goto cleanup; + } isSetup = true; return true; @@ -83,38 +199,202 @@ cleanup: void OpenClCollatingAndMeshingEngine::finalize() { - if (intensityBuffer) { - clReleaseMemObject(intensityBuffer); - intensityBuffer = nullptr; + // Call stop() first + stop(); + + // Unpin memory pages if they were pinned + if (memoryPinned) + { + if (collationBufferPtr && collationBufferSize > 0) { + munlock(collationBufferPtr, collationBufferSize); + } + if (assemblyBufferPtr && assemblyBufferSize > 0) { + munlock(assemblyBufferPtr, assemblyBufferSize); + } + + memoryPinned = false; } - if (xyzBuffer) { - clReleaseMemObject(xyzBuffer); - xyzBuffer = nullptr; + + // Release OpenCL buffers in reverse order + if (clCollationBuffer) + { + clReleaseMemObject(clCollationBuffer); + clCollationBuffer = nullptr; } - if (assemblyBuffer) { - clReleaseMemObject(assemblyBuffer); - assemblyBuffer = nullptr; + if (clAssemblyBuffer) + { + clReleaseMemObject(clAssemblyBuffer); + clAssemblyBuffer = nullptr; } - if (kernel) { + + // Release kernel + if (kernel) + { clReleaseKernel(kernel); kernel = nullptr; } - if (program) { + + // Release program + if (program) + { clReleaseProgram(program); program = nullptr; } - if (commandQueue) { + + // Release command queue + if (commandQueue) + { clReleaseCommandQueue(commandQueue); commandQueue = nullptr; } - if (context) { + + // Release context + if (context) + { clReleaseContext(context); context = nullptr; } + // Reset state variables device = nullptr; platform = nullptr; isSetup = false; + isRunning = false; + currentKernelEvent = nullptr; + assemblyBufferPtr = nullptr; + assemblyBufferSize = 0; + collationBufferPtr = nullptr; + collationBufferSize = 0; +} + +// Static callback for OpenCL event +void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback( + cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data) +{ + OpenClCollatingAndMeshingEngine* engine = + static_cast(user_data); + + if (!engine || !engine->isRunning || !engine->collateFrameReqCb) + { return; } + + // Post to io_service to call callback on the correct thread + if (engine->parent.device && engine->parent.device->componentThread) + { + engine->parent.device->componentThread->getIoService().post( + engine->collateFrameReqCb); + } +} + +void OpenClCollatingAndMeshingEngine::start( + StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, + collateFrameReqCbFn callback) +{ + if (!isSetup) + { + std::cerr << __func__ << ": engine not set up" << std::endl; + return; + } + + if (isRunning) + { + std::cerr << __func__ << ": already running, call stop() first" << std::endl; + return; + } + + // Validate buffers match what we set up + struct iovec assemblyIov = assemblyBuff.getClEngineIovec(); + struct iovec collationIov = collationBuff.getClEngineIovec(); + + if (assemblyIov.iov_base != assemblyBufferPtr + || assemblyIov.iov_len != assemblyBufferSize) + { + std::cerr << __func__ << ": assembly buffer mismatch" << std::endl; + return; + } + + if (collationIov.iov_base != collationBufferPtr + || collationIov.iov_len != collationBufferSize) + { + std::cerr << __func__ << ": collation buffer mismatch" << std::endl; + return; + } + + // Store the caller's callback + collateFrameReqCb = callback; + + // Set kernel arguments + cl_int err; + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clAssemblyBuffer); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set kernel arg 0: " << err << std::endl; + return; + } + + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clCollationBuffer); + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl; + return; + } + + // TODO: Set additional kernel arguments as needed (e.g., buffer sizes, metadata) + + // Enqueue kernel execution + size_t globalWorkSize = 1; // TODO: Calculate appropriate work size + err = clEnqueueNDRangeKernel( + commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr, + 0, nullptr, ¤tKernelEvent); + + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to enqueue kernel: " << err << std::endl; + return; + } + + // Set up callback using static member function + // We need to pass 'this' as user_data, but we need a shared_ptr + // For now, we'll use a workaround: store 'this' and use it carefully + // Actually, we should use a different approach - use a shared_ptr wrapper + // But for now, let's use a simpler approach with proper lifetime management + err = clSetEventCallback( + currentKernelEvent, CL_COMPLETE, kernelEventCallback, this); + + if (err != CL_SUCCESS) + { + std::cerr << __func__ << ": failed to set event callback: " << err << std::endl; + clReleaseEvent(currentKernelEvent); + currentKernelEvent = nullptr; + return; + } + + // TODO: Set up timeout timer in continuation class + // For now, timeout handling will be in the CollateFrameReq continuation + + isRunning = true; + // start() is synchronous - it returns immediately after setting up kernel execution + // The callback will be invoked when the kernel completes +} + +void OpenClCollatingAndMeshingEngine::stop() +{ + if (!isRunning) { + return; // No-op if not running + } + + // Cancel kernel execution if possible + if (currentKernelEvent) + { + // Note: OpenCL doesn't have a standard way to cancel kernel execution + // We can try to wait for it to complete or just release the event + // For now, we'll just wait for it to complete + clWaitForEvents(1, ¤tKernelEvent); + clReleaseEvent(currentKernelEvent); + currentKernelEvent = nullptr; + } + + isRunning = false; + collateFrameReqCb = nullptr; } } // namespace stim_buff diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index 9e6bc68..6be96ac 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -1,11 +1,14 @@ #ifndef _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H #define _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H +#include #include #include #include +#include #define CL_TARGET_OPENCL_VERSION 300 #include +#include "stagingBuffer.h" namespace smo { namespace stim_buff { @@ -31,6 +34,14 @@ public: bool setup(); void finalize(); + // Callback function type for collateFrameReq + typedef std::function collateFrameReqCbFn; + + void start( + StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, + collateFrameReqCbFn callback); + void stop(); + private: PcloudStimulusBuffer& parent; @@ -44,9 +55,26 @@ private: bool isSetup; // OpenCL buffers - cl_mem assemblyBuffer; - cl_mem xyzBuffer; - cl_mem intensityBuffer; + cl_mem clAssemblyBuffer; + cl_mem clCollationBuffer; + + // State tracking + bool isRunning; + cl_event currentKernelEvent; + bool memoryPinned; + + // Memory pinning tracking + void* assemblyBufferPtr; + size_t assemblyBufferSize; + void* collationBufferPtr; + size_t collationBufferSize; + + // Callback storage + collateFrameReqCbFn collateFrameReqCb; + + // Static callback for OpenCL event + static void CL_CALLBACK kernelEventCallback( + cl_event event, cl_int event_command_exec_status, void* user_data); }; } // namespace stim_buff