#include #include #include #include #include #include #include #include "openClCollatingAndMeshingEngine.h" #include "pcloudStimulusBuffer.h" #include "openClKernels.h" namespace smo { namespace stim_buff { OpenClCollatingAndMeshingEngine::OpenClCollatingAndMeshingEngine(PcloudStimulusBuffer& parent_) : parent(parent_), platform(nullptr), device(nullptr), context(nullptr), commandQueue(nullptr), program(nullptr), kernel(nullptr), isSetup(false), clAssemblyBuffer(nullptr), clCollationBuffer(nullptr), isRunning(false), currentKernelEvent(nullptr), memoryPinned(false), assemblyBufferPtr(nullptr), assemblyBufferSize(0), collationBufferPtr(nullptr), collationBufferSize(0) { } OpenClCollatingAndMeshingEngine::~OpenClCollatingAndMeshingEngine() { finalize(); } bool OpenClCollatingAndMeshingEngine::setup() { if (isSetup) { return true; } cl_int err; cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0}; // Get platform cl_uint numPlatforms; err = clGetPlatformIDs(1, &platform, &numPlatforms); if (err != CL_SUCCESS || numPlatforms == 0) { std::cerr << __func__ << ": failed to get OpenCL platform: " << err << std::endl; return false; } // Get device err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, nullptr); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to get GPU device: " << err << std::endl; return false; } // Create context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err); if (err != CL_SUCCESS || !context) { std::cerr << __func__ << ": failed to create OpenCL context: " << err << std::endl; goto cleanup; } // Create command queue commandQueue = clCreateCommandQueueWithProperties( context, device, queueProps, &err); if (err != CL_SUCCESS || !commandQueue) { std::cerr << __func__ << ": failed to create command queue: " << err << std::endl; goto cleanup; } // Declare variables early to avoid goto crossing initialization struct iovec assemblyIov; struct iovec collationIov; const char* kernelSource; size_t kernelSourceLen; // Get StagingBuffer memory pointers from parent assemblyIov = parent.assemblyBuffer.getClEngineIovec(); collationIov = parent.collationBuffer.getClEngineIovec(); assemblyBufferPtr = assemblyIov.iov_base; assemblyBufferSize = assemblyIov.iov_len; collationBufferPtr = collationIov.iov_base; collationBufferSize = collationIov.iov_len; // Pin memory pages using mlock() if (mlock(assemblyBufferPtr, assemblyBufferSize) != 0) { std::cerr << __func__ << ": failed to pin assembly buffer memory: " << strerror(errno) << std::endl; goto cleanup; } if (mlock(collationBufferPtr, collationBufferSize) != 0) { std::cerr << __func__ << ": failed to pin collation buffer memory: " << strerror(errno) << std::endl; munlock(assemblyBufferPtr, assemblyBufferSize); goto cleanup; } memoryPinned = true; // Create OpenCL buffers using CL_MEM_USE_HOST_PTR for zero-copy clAssemblyBuffer = clCreateBuffer( context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, assemblyBufferSize, assemblyBufferPtr, &err); if (err != CL_SUCCESS || !clAssemblyBuffer) { std::cerr << __func__ << ": failed to create assembly buffer: " << err << std::endl; goto cleanup; } clCollationBuffer = clCreateBuffer( context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, collationBufferSize, collationBufferPtr, &err); if (err != CL_SUCCESS || !clCollationBuffer) { std::cerr << __func__ << ": failed to create collation buffer: " << err << std::endl; goto cleanup; } // Create program and kernel from external source kernelSource = collateKernelStart; kernelSourceLen = collateKernelNBytes; program = clCreateProgramWithSource( context, 1, &kernelSource, &kernelSourceLen, &err); if (err != CL_SUCCESS || !program) { std::cerr << __func__ << ": failed to create program: " << err << std::endl; goto cleanup; } err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to build program: " << err << std::endl; // Print build log if available size_t logSize = 0; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize); if (logSize > 0) { std::vector log(logSize); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log.data(), nullptr); std::cerr << "Build log: " << log.data() << std::endl; } goto cleanup; } kernel = clCreateKernel(program, "collate", &err); if (err != CL_SUCCESS || !kernel) { std::cerr << __func__ << ": failed to create kernel: " << err << std::endl; goto cleanup; } isSetup = true; return true; cleanup: finalize(); return false; } void OpenClCollatingAndMeshingEngine::finalize() { // Call stop() first stop(); // Unpin memory pages if they were pinned if (memoryPinned) { if (collationBufferPtr && collationBufferSize > 0) { munlock(collationBufferPtr, collationBufferSize); } if (assemblyBufferPtr && assemblyBufferSize > 0) { munlock(assemblyBufferPtr, assemblyBufferSize); } memoryPinned = false; } // Release OpenCL buffers in reverse order if (clCollationBuffer) { clReleaseMemObject(clCollationBuffer); clCollationBuffer = nullptr; } if (clAssemblyBuffer) { clReleaseMemObject(clAssemblyBuffer); clAssemblyBuffer = nullptr; } // Release kernel if (kernel) { clReleaseKernel(kernel); kernel = nullptr; } // Release program if (program) { clReleaseProgram(program); program = nullptr; } // Release command queue if (commandQueue) { clReleaseCommandQueue(commandQueue); commandQueue = nullptr; } // Release context if (context) { clReleaseContext(context); context = nullptr; } // Reset state variables device = nullptr; platform = nullptr; isSetup = false; isRunning = false; currentKernelEvent = nullptr; assemblyBufferPtr = nullptr; assemblyBufferSize = 0; collationBufferPtr = nullptr; collationBufferSize = 0; } // Static callback for OpenCL event void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback( cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data) { OpenClCollatingAndMeshingEngine* engine = static_cast(user_data); if (!engine || !engine->isRunning || !engine->collateFrameReqCb) { return; } // Post to io_service to call callback on the correct thread if (engine->parent.device && engine->parent.device->componentThread) { engine->parent.device->componentThread->getIoService().post( engine->collateFrameReqCb); } } void OpenClCollatingAndMeshingEngine::start( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, collateFrameReqCbFn callback) { if (!isSetup) { std::cerr << __func__ << ": engine not set up" << std::endl; return; } if (isRunning) { std::cerr << __func__ << ": already running, call stop() first" << std::endl; return; } // Validate buffers match what we set up struct iovec assemblyIov = assemblyBuff.getClEngineIovec(); struct iovec collationIov = collationBuff.getClEngineIovec(); if (assemblyIov.iov_base != assemblyBufferPtr || assemblyIov.iov_len != assemblyBufferSize) { std::cerr << __func__ << ": assembly buffer mismatch" << std::endl; return; } if (collationIov.iov_base != collationBufferPtr || collationIov.iov_len != collationBufferSize) { std::cerr << __func__ << ": collation buffer mismatch" << std::endl; return; } // Store the caller's callback collateFrameReqCb = callback; // Set kernel arguments cl_int err; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clAssemblyBuffer); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set kernel arg 0: " << err << std::endl; return; } err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clCollationBuffer); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl; return; } // TODO: Set additional kernel arguments as needed (e.g., buffer sizes, metadata) // Enqueue kernel execution size_t globalWorkSize = 1; // TODO: Calculate appropriate work size err = clEnqueueNDRangeKernel( commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr, 0, nullptr, ¤tKernelEvent); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to enqueue kernel: " << err << std::endl; return; } // Set up callback using static member function // We need to pass 'this' as user_data, but we need a shared_ptr // For now, we'll use a workaround: store 'this' and use it carefully // Actually, we should use a different approach - use a shared_ptr wrapper // But for now, let's use a simpler approach with proper lifetime management err = clSetEventCallback( currentKernelEvent, CL_COMPLETE, kernelEventCallback, this); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set event callback: " << err << std::endl; clReleaseEvent(currentKernelEvent); currentKernelEvent = nullptr; return; } // TODO: Set up timeout timer in continuation class // For now, timeout handling will be in the CollateFrameReq continuation isRunning = true; // start() is synchronous - it returns immediately after setting up kernel execution // The callback will be invoked when the kernel completes } void OpenClCollatingAndMeshingEngine::stop() { if (!isRunning) { return; // No-op if not running } // Cancel kernel execution if possible if (currentKernelEvent) { // Note: OpenCL doesn't have a standard way to cancel kernel execution // We can try to wait for it to complete or just release the event // For now, we'll just wait for it to complete clWaitForEvents(1, ¤tKernelEvent); clReleaseEvent(currentKernelEvent); currentKernelEvent = nullptr; } isRunning = false; collateFrameReqCb = nullptr; } } // namespace stim_buff } // namespace smo