Files
salmanoff/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h
T
hayodea 322a8137b2 Revert "LivoxGen1: Use syncCancelerForAsyncWork in producer pipeline"
This reverts commit d788810a05.

We're doing this because it's not necessary. We will be porting to
coros soon and we can just use brace-scopes.
2026-05-30 07:18:29 -04:00

296 lines
9.0 KiB
C++

#ifndef _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
#define _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
#include <boostAsioLinkageFix.h>
#include <cstdint>
#include <cstddef>
#include <memory>
#include <type_traits>
#include <functional>
#include <optional>
#include <iostream>
#include <stdexcept>
#include <chrono>
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
#include <spinscale/asynchronousLoop.h>
#include <spinscale/cps/callback.h>
#include <spinscale/spinLock.h>
#include <user/stimulusFrame.h>
#include <user/stagingBuffer.h>
#include <user/frameAssemblyDesc.h>
#include <user/compute.h>
#include <user/senseApiDesc.h>
#include "pcloudAmbienceQualeIfaceApi.h"
#define OCLCOLLMESH_ENGN_FINALIZE_DELAY_MS 1
namespace smo {
namespace stim_buff {
/* One "job" per attached ambience stimbuff: the StimulusFrame to write the
* uint32 passband count into, and the comparator to apply to the per-slot
* averages the collate kernel staged into averageIntensityBuffer. A job is
* only constructed when its corresponding ambience stimbuff is attached.
*/
struct AmbienceProductionDesc
{
std::reference_wrapper<StimulusFrame> frame;
ParamComparator comparator;
};
// Custom deleters for OpenCL handles
struct ClProgramDeleter
{
void operator()(cl_program prog) const
{ if (prog) { clReleaseProgram(prog); } }
};
struct ClKernelDeleter
{
void operator()(cl_kernel kernel) const
{ if (kernel) { clReleaseKernel(kernel); } }
};
struct ClEventDeleter
{
void operator()(cl_event event) const
{ if (event) { clReleaseEvent(event); } }
};
// Type aliases for OpenCL handle unique_ptrs
using ClProgramPtr = std::unique_ptr<
std::remove_pointer_t<cl_program>, ClProgramDeleter>;
using ClKernelPtr = std::unique_ptr<
std::remove_pointer_t<cl_kernel>, ClKernelDeleter>;
using ClEventPtr = std::unique_ptr<
std::remove_pointer_t<cl_event>, ClEventDeleter>;
class PcloudStimulusProducer;
class OpenClCollatingAndMeshingEngine
{
public:
explicit OpenClCollatingAndMeshingEngine(PcloudStimulusProducer& parent);
~OpenClCollatingAndMeshingEngine();
// Non-copyable, movable
OpenClCollatingAndMeshingEngine(
const OpenClCollatingAndMeshingEngine&) = delete;
OpenClCollatingAndMeshingEngine& operator=(
const OpenClCollatingAndMeshingEngine&) = delete;
OpenClCollatingAndMeshingEngine(
OpenClCollatingAndMeshingEngine&&) = default;
OpenClCollatingAndMeshingEngine& operator=(
OpenClCollatingAndMeshingEngine&&) = default;
bool setup();
void finalize();
typedef std::function<void(bool, StimulusFrame&)>
compactCollateAndMeshFrameReqCbFn;
void compactCollateAndMeshFrameReq(
sscl::AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc,
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc,
sscl::cps::Callback<compactCollateAndMeshFrameReqCbFn> callback);
private:
// Callback function types
typedef std::function<void(cl_int)> compactKernelCbFn;
typedef std::function<void(cl_int)> collateKernelCbFn;
bool startCompactKernel(
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
compactKernelCbFn callback);
bool startCollateKernel(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
bool anyAmbienceAttached,
collateKernelCbFn callback);
void compactKernelComplete(bool isFinalizing=false);
void collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
bool anyAmbienceAttached,
bool isFinalizing=false);
bool stop();
/* Apply `comparator` to the nSucceeded per-slot averages the collate
* kernel wrote into averageIntensityBuffer, and write the resulting
* uint32 passband count as the single stimspot of `ambienceFrame`.
*/
void produceAmbienceStimulusFrame(
StimulusFrame& ambienceFrame,
const ParamComparator& comparator,
uint32_t nSucceeded);
public:
// Get kernel execution durations in milliseconds
std::chrono::milliseconds getCompactKernelDuration() const;
std::chrono::milliseconds getCollateKernelDuration() const;
private:
PcloudStimulusProducer& parent;
// OpenCL infrastructure (managed by ComputeManager)
std::shared_ptr<smo::compute::ComputeDevice> computeDevice;
ClProgramPtr slotCompactorProgram;
ClProgramPtr collateProgram;
ClKernelPtr slotCompactorKernel;
ClKernelPtr collateKernel;
// OpenCL buffers (managed by ComputeManager)
std::shared_ptr<smo::compute::ClBuffer> clAssemblyBufferClBuffer;
std::shared_ptr<smo::compute::ClBuffer> clCollationBufferClBuffer;
std::shared_ptr<smo::compute::ClBuffer> clAverageIntensityBufferClBuffer;
// Cached cl_mem handles for the device we're using
cl_mem clAssemblyBuffer;
cl_mem clCollationBuffer;
cl_mem clAverageIntensityBuffer;
// State tracking
sscl::SpinLock shouldAcceptRequestsLock;
bool shouldAcceptRequests;
bool compactIsRunning;
bool collateIsRunning;
cl_event currentCompactKernelEvent;
cl_event currentCollateKernelEvent;
// Memory tracking
void* assemblyBufferPtr;
size_t assemblyBufferSize;
void* collationBufferPtr;
size_t collationBufferSize;
void* averageIntensityBufferPtr;
size_t averageIntensityBufferSize;
// Mapped buffer pointers (for zero-copy synchronization)
void* mappedAssemblyBuffer;
void* mappedCollationBuffer;
void* mappedAverageIntensityBuffer;
// Frame descriptor (cached from setup)
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
// Callback storage
compactKernelCbFn compactKernelCb;
collateKernelCbFn collateKernelCb;
// Timestamp tracking for kernel execution
std::chrono::high_resolution_clock::time_point compactKernelStartTime;
std::chrono::high_resolution_clock::time_point compactKernelEndTime;
std::chrono::high_resolution_clock::time_point collateKernelStartTime;
std::chrono::high_resolution_clock::time_point collateKernelEndTime;
// Static callbacks for OpenCL events
static void CL_CALLBACK compactKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);
static void CL_CALLBACK collateKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);
// Private helper methods
bool compileAndPrepareKernel(
const std::shared_ptr<smo::compute::ComputeDevice>& computeDevice,
const char* kernelSource, size_t kernelSourceLen,
const char* kernelName, ClProgramPtr& program, ClKernelPtr& kernel);
bool compileAndPrepareKernels(
const std::shared_ptr<smo::compute::ComputeDevice>& computeDevice,
ClProgramPtr& slotCompactorProgram, ClProgramPtr& collateProgram,
ClKernelPtr& slotCompactorKernel, ClKernelPtr& collateKernel);
bool setupSlotCompactorsArgs(
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
bool setupCollateDgramsArgs(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
bool anyAmbienceAttached);
// Generic buffer mapping/unmapping for zero-copy synchronization
bool mapBuffer(
cl_mem buffer, size_t size, cl_map_flags mapFlags, void*& mappedPtr);
bool unmapBuffer(cl_mem buffer, void*& mappedPtr);
// Wrapper functions for specific buffers
bool mapAssemblyBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapAssemblyBuffer();
bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapCollationBuffer();
bool mapAverageIntensityBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapAverageIntensityBuffer();
// Forward declaration for continuation class
class CompactCollateAndMeshFrameReq;
// Unified kernel start function
template<typename SetupArgsFn, typename ValidateBuffersFn>
bool startKernel(
cl_kernel kernel,
cl_event* eventPtr,
SetupArgsFn setupArgsFn,
ValidateBuffersFn validateBuffersFn,
size_t globalWorkSize,
void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*),
const char* kernelName,
bool& isRunning)
{
if (isRunning)
{
std::cerr << __func__ << ": already running, call stop() first"
<< std::endl;
return false;
}
// Validate buffers
validateBuffersFn();
// Set up kernel arguments
if (!setupArgsFn()) {
return false;
}
// Enqueue kernel execution
cl_int err = clEnqueueNDRangeKernel(
computeDevice->commandQueue, kernel,
1, nullptr, &globalWorkSize, nullptr,
0, nullptr, eventPtr);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to enqueue " << kernelName
<< " kernel: " << err << std::endl;
return false;
}
// Set up callback using static member function
err = clSetEventCallback(
*eventPtr, CL_COMPLETE, eventCallback, this);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set event callback: " << err
<< std::endl;
clReleaseEvent(*eventPtr);
*eventPtr = nullptr;
return false;
}
// Force queue flush to ensure event processing and callback invocation
err = clFlush(computeDevice->commandQueue);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to flush queue: " << err
<< std::endl;
clReleaseEvent(*eventPtr);
*eventPtr = nullptr;
return false;
}
isRunning = true;
return true;
}
};
} // namespace stim_buff
} // namespace smo
#endif // _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H