Files
salmanoff/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h
T
hayodea 313454c426 OClCollMeshEngn: Add bridged delay in finalize()
See the diff of the todo file within this commit for more details.

In short, we do this to prevent the possibility of an in-flight async
contin accessing metadata that we've already destroyed after finalize()
has been called.
2025-11-27 22:26:50 -04:00

237 lines
7.1 KiB
C++

#ifndef _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
#define _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
#include <boostAsioLinkageFix.h>
#include <cstdint>
#include <cstddef>
#include <memory>
#include <functional>
#include <optional>
#include <iostream>
#include <stdexcept>
#include <chrono>
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
#include <asynchronousLoop.h>
#include <callback.h>
#include <spinLock.h>
#include <user/stimulusFrame.h>
#include <user/stagingBuffer.h>
#include <user/frameAssemblyDesc.h>
#include <user/compute.h>
#include <user/senseApiDesc.h>
#define OCLCOLLMESH_ENGN_FINALIZE_DELAY_MS 1
namespace smo {
namespace stim_buff {
class PcloudStimulusProducer;
class OpenClCollatingAndMeshingEngine
{
public:
explicit OpenClCollatingAndMeshingEngine(PcloudStimulusProducer& parent);
~OpenClCollatingAndMeshingEngine();
// Non-copyable, movable
OpenClCollatingAndMeshingEngine(
const OpenClCollatingAndMeshingEngine&) = delete;
OpenClCollatingAndMeshingEngine& operator=(
const OpenClCollatingAndMeshingEngine&) = delete;
OpenClCollatingAndMeshingEngine(
OpenClCollatingAndMeshingEngine&&) = default;
OpenClCollatingAndMeshingEngine& operator=(
OpenClCollatingAndMeshingEngine&&) = default;
bool setup();
void finalize();
typedef std::function<void(bool, StimulusFrame&)>
compactCollateAndMeshFrameReqCbFn;
void compactCollateAndMeshFrameReq(
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback);
private:
// Callback function types
typedef std::function<void(cl_int)> compactKernelCbFn;
typedef std::function<void(cl_int)> collateKernelCbFn;
bool startCompactKernel(
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
compactKernelCbFn callback);
bool startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
collateKernelCbFn callback);
void compactKernelComplete(bool isFinalizing=false);
void collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool isFinalizing=false);
bool stop();
public:
// Get kernel execution durations in milliseconds
std::chrono::milliseconds getCompactKernelDuration() const;
std::chrono::milliseconds getCollateKernelDuration() const;
private:
PcloudStimulusProducer& parent;
// OpenCL infrastructure (managed by ComputeManager)
std::shared_ptr<smo::compute::ComputeDevice> computeDevice;
cl_program slotCompactorProgram;
cl_program collateProgram;
cl_kernel slotCompactorKernel;
cl_kernel collateKernel;
// OpenCL buffers (managed by ComputeManager)
std::shared_ptr<smo::compute::ClBuffer> clAssemblyBufferClBuffer;
std::shared_ptr<smo::compute::ClBuffer> clCollationBufferClBuffer;
// Cached cl_mem handles for the device we're using
cl_mem clAssemblyBuffer;
cl_mem clCollationBuffer;
// State tracking
SpinLock shouldAcceptRequestsLock;
bool shouldAcceptRequests;
bool compactIsRunning;
bool collateIsRunning;
cl_event currentCompactKernelEvent;
cl_event currentCollateKernelEvent;
// Memory tracking
void* assemblyBufferPtr;
size_t assemblyBufferSize;
void* collationBufferPtr;
size_t collationBufferSize;
// Mapped buffer pointers (for zero-copy synchronization)
void* mappedAssemblyBuffer;
void* mappedCollationBuffer;
// Frame descriptor (cached from setup)
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
// Callback storage
compactKernelCbFn compactKernelCb;
collateKernelCbFn collateKernelCb;
// Timestamp tracking for kernel execution
std::chrono::high_resolution_clock::time_point compactKernelStartTime;
std::chrono::high_resolution_clock::time_point compactKernelEndTime;
std::chrono::high_resolution_clock::time_point collateKernelStartTime;
std::chrono::high_resolution_clock::time_point collateKernelEndTime;
// Static callbacks for OpenCL events
static void CL_CALLBACK compactKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);
static void CL_CALLBACK collateKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);
// Private helper methods
bool compileAndPrepareKernel(
const char* kernelSource, size_t kernelSourceLen,
const char* kernelName, cl_program& program, cl_kernel& kernel);
bool compileAndPrepareKernels();
bool setupSlotCompactorsArgs(
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
bool setupCollateDgramsArgs(
StagingBuffer& assemblyBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame);
// Generic buffer mapping/unmapping for zero-copy synchronization
bool mapBuffer(
cl_mem buffer, size_t size, cl_map_flags mapFlags, void*& mappedPtr);
bool unmapBuffer(cl_mem buffer, void*& mappedPtr);
// Wrapper functions for specific buffers
bool mapAssemblyBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapAssemblyBuffer();
bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapCollationBuffer();
// Forward declaration for continuation class
class CompactCollateAndMeshFrameReq;
// Unified kernel start function
template<typename SetupArgsFn, typename ValidateBuffersFn>
bool startKernel(
cl_kernel kernel,
cl_event* eventPtr,
SetupArgsFn setupArgsFn,
ValidateBuffersFn validateBuffersFn,
size_t globalWorkSize,
void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*),
const char* kernelName,
bool& isRunning)
{
if (isRunning)
{
std::cerr << __func__ << ": already running, call stop() first"
<< std::endl;
return false;
}
// Validate buffers
validateBuffersFn();
// Set up kernel arguments
if (!setupArgsFn()) {
return false;
}
// Enqueue kernel execution
cl_int err = clEnqueueNDRangeKernel(
computeDevice->commandQueue, kernel,
1, nullptr, &globalWorkSize, nullptr,
0, nullptr, eventPtr);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to enqueue " << kernelName
<< " kernel: " << err << std::endl;
return false;
}
// Set up callback using static member function
err = clSetEventCallback(
*eventPtr, CL_COMPLETE, eventCallback, this);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set event callback: " << err
<< std::endl;
clReleaseEvent(*eventPtr);
*eventPtr = nullptr;
return false;
}
// Force queue flush to ensure event processing and callback invocation
err = clFlush(computeDevice->commandQueue);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to flush queue: " << err
<< std::endl;
clReleaseEvent(*eventPtr);
*eventPtr = nullptr;
return false;
}
isRunning = true;
return true;
}
};
} // namespace stim_buff
} // namespace smo
#endif // _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H