#ifndef _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H #define _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H #include #include #include #include #include #include #include #include #define CL_TARGET_OPENCL_VERSION 120 #include #include #include #include #include #include #include namespace smo { namespace stim_buff { class PcloudStimulusProducer; class OpenClCollatingAndMeshingEngine { public: explicit OpenClCollatingAndMeshingEngine(PcloudStimulusProducer& parent); ~OpenClCollatingAndMeshingEngine(); // Non-copyable, movable OpenClCollatingAndMeshingEngine( const OpenClCollatingAndMeshingEngine&) = delete; OpenClCollatingAndMeshingEngine& operator=( const OpenClCollatingAndMeshingEngine&) = delete; OpenClCollatingAndMeshingEngine( OpenClCollatingAndMeshingEngine&&) = default; OpenClCollatingAndMeshingEngine& operator=( OpenClCollatingAndMeshingEngine&&) = default; bool setup(); void finalize(); typedef std::function compactCollateAndMeshFrameReqCbFn; void compactCollateAndMeshFrameReq( AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame, Callback callback); private: // Callback function types typedef std::function compactKernelCbFn; typedef std::function collateKernelCbFn; bool startCompactKernel( StagingBuffer& assemblyBuff, uint32_t nSucceeded, compactKernelCbFn callback); bool startCollateKernel( StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, collateKernelCbFn callback); void compactKernelComplete(bool isFinalizing=false); void collateKernelComplete(bool isFinalizing=false); bool stop(); public: // Get kernel execution durations in milliseconds std::chrono::milliseconds getCompactKernelDuration() const; std::chrono::milliseconds getCollateKernelDuration() const; private: PcloudStimulusProducer& parent; // OpenCL infrastructure cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue commandQueue; cl_program slotCompactorProgram; cl_program collateProgram; cl_kernel slotCompactorKernel; cl_kernel collateKernel; // OpenCL buffers cl_mem clAssemblyBuffer; cl_mem clCollationBuffer; // State tracking SpinLock shouldAcceptRequestsLock; bool shouldAcceptRequests; bool compactIsRunning; bool collateIsRunning; cl_event currentCompactKernelEvent; cl_event currentCollateKernelEvent; // Memory tracking void* assemblyBufferPtr; size_t assemblyBufferSize; void* collationBufferPtr; size_t collationBufferSize; // Mapped buffer pointers (for zero-copy synchronization) void* mappedAssemblyBuffer; void* mappedCollationBuffer; // Frame descriptor (cached from setup) std::shared_ptr frameAssemblyDesc; // Callback storage compactKernelCbFn compactKernelCb; collateKernelCbFn collateKernelCb; // Timestamp tracking for kernel execution std::chrono::high_resolution_clock::time_point compactKernelStartTime; std::chrono::high_resolution_clock::time_point compactKernelEndTime; std::chrono::high_resolution_clock::time_point collateKernelStartTime; std::chrono::high_resolution_clock::time_point collateKernelEndTime; // Static callbacks for OpenCL events static void CL_CALLBACK compactKernelEventCallback( cl_event event, cl_int event_command_exec_status, void* user_data); static void CL_CALLBACK collateKernelEventCallback( cl_event event, cl_int event_command_exec_status, void* user_data); // Private helper methods bool compileAndPrepareKernel( const char* kernelSource, size_t kernelSourceLen, const char* kernelName, cl_program& program, cl_kernel& kernel); bool compileAndPrepareKernels(); bool setupSlotCompactorsArgs( StagingBuffer& assemblyBuff, uint32_t nSucceeded); bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff); // Generic buffer mapping/unmapping for zero-copy synchronization bool mapBuffer( cl_mem buffer, size_t size, cl_map_flags mapFlags, void*& mappedPtr); bool unmapBuffer(cl_mem buffer, void*& mappedPtr); // Wrapper functions for specific buffers bool mapAssemblyBuffer(cl_map_flags mapFlags = CL_MAP_READ); bool unmapAssemblyBuffer(); bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ); bool unmapCollationBuffer(); // Forward declaration for continuation class class CompactCollateAndMeshFrameReq; // Unified kernel start function template bool startKernel( cl_kernel kernel, cl_event* eventPtr, SetupArgsFn setupArgsFn, ValidateBuffersFn validateBuffersFn, size_t globalWorkSize, void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*), const char* kernelName, bool& isRunning) { if (isRunning) { std::cerr << __func__ << ": already running, call stop() first" << std::endl; return false; } // Validate buffers validateBuffersFn(); // Set up kernel arguments if (!setupArgsFn()) { return false; } // Enqueue kernel execution cl_int err = clEnqueueNDRangeKernel( commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr, 0, nullptr, eventPtr); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to enqueue " << kernelName << " kernel: " << err << std::endl; return false; } // Set up callback using static member function err = clSetEventCallback( *eventPtr, CL_COMPLETE, eventCallback, this); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to set event callback: " << err << std::endl; clReleaseEvent(*eventPtr); *eventPtr = nullptr; return false; } // Force queue flush to ensure event processing and callback invocation err = clFlush(commandQueue); if (err != CL_SUCCESS) { std::cerr << __func__ << ": failed to flush queue: " << err << std::endl; clReleaseEvent(*eventPtr); *eventPtr = nullptr; return false; } isRunning = true; return true; } }; } // namespace stim_buff } // namespace smo #endif // _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H