Files
salmanoff/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h
T
hayodea 280b6f7d1c OClCollMeshEngn: Produce ambience count; set postrin threshold
We modify the semantics/meaning of the ambience stim feature.
It now represents the number of frames whose average intensity
is below the ambienceLowVal.

We can now implement the postrin as the event wherein the number
of frames whose intensity <= ambienceLowVal exceeds
postrin-interest-threshold.
2025-11-28 02:55:24 -04:00

247 lines
7.5 KiB
C++

#ifndef _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
#define _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
#include <boostAsioLinkageFix.h>
#include <cstdint>
#include <cstddef>
#include <memory>
#include <functional>
#include <optional>
#include <iostream>
#include <stdexcept>
#include <chrono>
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
#include <asynchronousLoop.h>
#include <callback.h>
#include <spinLock.h>
#include <user/stimulusFrame.h>
#include <user/stagingBuffer.h>
#include <user/frameAssemblyDesc.h>
#include <user/compute.h>
#include <user/senseApiDesc.h>
#define OCLCOLLMESH_ENGN_FINALIZE_DELAY_MS 1
namespace smo {
namespace stim_buff {
class PcloudStimulusProducer;
class OpenClCollatingAndMeshingEngine
{
public:
explicit OpenClCollatingAndMeshingEngine(PcloudStimulusProducer& parent);
~OpenClCollatingAndMeshingEngine();
// Non-copyable, movable
OpenClCollatingAndMeshingEngine(
const OpenClCollatingAndMeshingEngine&) = delete;
OpenClCollatingAndMeshingEngine& operator=(
const OpenClCollatingAndMeshingEngine&) = delete;
OpenClCollatingAndMeshingEngine(
OpenClCollatingAndMeshingEngine&&) = default;
OpenClCollatingAndMeshingEngine& operator=(
OpenClCollatingAndMeshingEngine&&) = default;
bool setup();
void finalize();
typedef std::function<void(bool, StimulusFrame&)>
compactCollateAndMeshFrameReqCbFn;
void compactCollateAndMeshFrameReq(
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback);
private:
// Callback function types
typedef std::function<void(cl_int)> compactKernelCbFn;
typedef std::function<void(cl_int)> collateKernelCbFn;
bool startCompactKernel(
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
compactKernelCbFn callback);
bool startCollateKernel(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
collateKernelCbFn callback);
void compactKernelComplete(bool isFinalizing=false);
void collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool isFinalizing=false);
bool stop();
public:
// Get kernel execution durations in milliseconds
std::chrono::milliseconds getCompactKernelDuration() const;
std::chrono::milliseconds getCollateKernelDuration() const;
// Produce ambience frame from average intensity data
void produceAmbienceStimulusFrame(
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
uint32_t nSucceeded);
private:
PcloudStimulusProducer& parent;
// OpenCL infrastructure (managed by ComputeManager)
std::shared_ptr<smo::compute::ComputeDevice> computeDevice;
cl_program slotCompactorProgram;
cl_program collateProgram;
cl_kernel slotCompactorKernel;
cl_kernel collateKernel;
// OpenCL buffers (managed by ComputeManager)
std::shared_ptr<smo::compute::ClBuffer> clAssemblyBufferClBuffer;
std::shared_ptr<smo::compute::ClBuffer> clCollationBufferClBuffer;
std::shared_ptr<smo::compute::ClBuffer> clAverageIntensityBufferClBuffer;
// Cached cl_mem handles for the device we're using
cl_mem clAssemblyBuffer;
cl_mem clCollationBuffer;
cl_mem clAverageIntensityBuffer;
// State tracking
SpinLock shouldAcceptRequestsLock;
bool shouldAcceptRequests;
bool compactIsRunning;
bool collateIsRunning;
cl_event currentCompactKernelEvent;
cl_event currentCollateKernelEvent;
// Memory tracking
void* assemblyBufferPtr;
size_t assemblyBufferSize;
void* collationBufferPtr;
size_t collationBufferSize;
void* averageIntensityBufferPtr;
size_t averageIntensityBufferSize;
// Mapped buffer pointers (for zero-copy synchronization)
void* mappedAssemblyBuffer;
void* mappedCollationBuffer;
void* mappedAverageIntensityBuffer;
// Frame descriptor (cached from setup)
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
// Callback storage
compactKernelCbFn compactKernelCb;
collateKernelCbFn collateKernelCb;
// Timestamp tracking for kernel execution
std::chrono::high_resolution_clock::time_point compactKernelStartTime;
std::chrono::high_resolution_clock::time_point compactKernelEndTime;
std::chrono::high_resolution_clock::time_point collateKernelStartTime;
std::chrono::high_resolution_clock::time_point collateKernelEndTime;
// Static callbacks for OpenCL events
static void CL_CALLBACK compactKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);
static void CL_CALLBACK collateKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);
// Private helper methods
bool compileAndPrepareKernel(
const char* kernelSource, size_t kernelSourceLen,
const char* kernelName, cl_program& program, cl_kernel& kernel);
bool compileAndPrepareKernels();
bool setupSlotCompactorsArgs(
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
bool setupCollateDgramsArgs(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame);
// Generic buffer mapping/unmapping for zero-copy synchronization
bool mapBuffer(
cl_mem buffer, size_t size, cl_map_flags mapFlags, void*& mappedPtr);
bool unmapBuffer(cl_mem buffer, void*& mappedPtr);
// Wrapper functions for specific buffers
bool mapAssemblyBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapAssemblyBuffer();
bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapCollationBuffer();
bool mapAverageIntensityBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapAverageIntensityBuffer();
// Forward declaration for continuation class
class CompactCollateAndMeshFrameReq;
// Unified kernel start function
template<typename SetupArgsFn, typename ValidateBuffersFn>
bool startKernel(
cl_kernel kernel,
cl_event* eventPtr,
SetupArgsFn setupArgsFn,
ValidateBuffersFn validateBuffersFn,
size_t globalWorkSize,
void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*),
const char* kernelName,
bool& isRunning)
{
if (isRunning)
{
std::cerr << __func__ << ": already running, call stop() first"
<< std::endl;
return false;
}
// Validate buffers
validateBuffersFn();
// Set up kernel arguments
if (!setupArgsFn()) {
return false;
}
// Enqueue kernel execution
cl_int err = clEnqueueNDRangeKernel(
computeDevice->commandQueue, kernel,
1, nullptr, &globalWorkSize, nullptr,
0, nullptr, eventPtr);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to enqueue " << kernelName
<< " kernel: " << err << std::endl;
return false;
}
// Set up callback using static member function
err = clSetEventCallback(
*eventPtr, CL_COMPLETE, eventCallback, this);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set event callback: " << err
<< std::endl;
clReleaseEvent(*eventPtr);
*eventPtr = nullptr;
return false;
}
// Force queue flush to ensure event processing and callback invocation
err = clFlush(computeDevice->commandQueue);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to flush queue: " << err
<< std::endl;
clReleaseEvent(*eventPtr);
*eventPtr = nullptr;
return false;
}
isRunning = true;
return true;
}
};
} // namespace stim_buff
} // namespace smo
#endif // _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H