280b6f7d1c
We modify the semantics/meaning of the ambience stim feature. It now represents the number of frames whose average intensity is below the ambienceLowVal. We can now implement the postrin as the event wherein the number of frames whose intensity <= ambienceLowVal exceeds postrin-interest-threshold.
247 lines
7.5 KiB
C++
247 lines
7.5 KiB
C++
#ifndef _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
|
|
#define _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
|
|
|
|
#include <boostAsioLinkageFix.h>
|
|
#include <cstdint>
|
|
#include <cstddef>
|
|
#include <memory>
|
|
#include <functional>
|
|
#include <optional>
|
|
#include <iostream>
|
|
#include <stdexcept>
|
|
#include <chrono>
|
|
#define CL_TARGET_OPENCL_VERSION 120
|
|
#include <CL/cl.h>
|
|
#include <asynchronousLoop.h>
|
|
#include <callback.h>
|
|
#include <spinLock.h>
|
|
#include <user/stimulusFrame.h>
|
|
#include <user/stagingBuffer.h>
|
|
#include <user/frameAssemblyDesc.h>
|
|
#include <user/compute.h>
|
|
#include <user/senseApiDesc.h>
|
|
|
|
#define OCLCOLLMESH_ENGN_FINALIZE_DELAY_MS 1
|
|
|
|
namespace smo {
|
|
namespace stim_buff {
|
|
|
|
class PcloudStimulusProducer;
|
|
|
|
class OpenClCollatingAndMeshingEngine
|
|
{
|
|
public:
|
|
explicit OpenClCollatingAndMeshingEngine(PcloudStimulusProducer& parent);
|
|
~OpenClCollatingAndMeshingEngine();
|
|
|
|
// Non-copyable, movable
|
|
OpenClCollatingAndMeshingEngine(
|
|
const OpenClCollatingAndMeshingEngine&) = delete;
|
|
OpenClCollatingAndMeshingEngine& operator=(
|
|
const OpenClCollatingAndMeshingEngine&) = delete;
|
|
OpenClCollatingAndMeshingEngine(
|
|
OpenClCollatingAndMeshingEngine&&) = default;
|
|
OpenClCollatingAndMeshingEngine& operator=(
|
|
OpenClCollatingAndMeshingEngine&&) = default;
|
|
|
|
bool setup();
|
|
void finalize();
|
|
|
|
typedef std::function<void(bool, StimulusFrame&)>
|
|
compactCollateAndMeshFrameReqCbFn;
|
|
void compactCollateAndMeshFrameReq(
|
|
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
|
Callback<compactCollateAndMeshFrameReqCbFn> callback);
|
|
|
|
private:
|
|
// Callback function types
|
|
typedef std::function<void(cl_int)> compactKernelCbFn;
|
|
typedef std::function<void(cl_int)> collateKernelCbFn;
|
|
|
|
bool startCompactKernel(
|
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
|
compactKernelCbFn callback);
|
|
bool startCollateKernel(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
|
collateKernelCbFn callback);
|
|
|
|
void compactKernelComplete(bool isFinalizing=false);
|
|
void collateKernelComplete(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
|
bool isFinalizing=false);
|
|
bool stop();
|
|
|
|
public:
|
|
// Get kernel execution durations in milliseconds
|
|
std::chrono::milliseconds getCompactKernelDuration() const;
|
|
std::chrono::milliseconds getCollateKernelDuration() const;
|
|
|
|
// Produce ambience frame from average intensity data
|
|
void produceAmbienceStimulusFrame(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
|
uint32_t nSucceeded);
|
|
|
|
private:
|
|
PcloudStimulusProducer& parent;
|
|
|
|
// OpenCL infrastructure (managed by ComputeManager)
|
|
std::shared_ptr<smo::compute::ComputeDevice> computeDevice;
|
|
cl_program slotCompactorProgram;
|
|
cl_program collateProgram;
|
|
cl_kernel slotCompactorKernel;
|
|
cl_kernel collateKernel;
|
|
|
|
// OpenCL buffers (managed by ComputeManager)
|
|
std::shared_ptr<smo::compute::ClBuffer> clAssemblyBufferClBuffer;
|
|
std::shared_ptr<smo::compute::ClBuffer> clCollationBufferClBuffer;
|
|
std::shared_ptr<smo::compute::ClBuffer> clAverageIntensityBufferClBuffer;
|
|
// Cached cl_mem handles for the device we're using
|
|
cl_mem clAssemblyBuffer;
|
|
cl_mem clCollationBuffer;
|
|
cl_mem clAverageIntensityBuffer;
|
|
|
|
// State tracking
|
|
SpinLock shouldAcceptRequestsLock;
|
|
bool shouldAcceptRequests;
|
|
bool compactIsRunning;
|
|
bool collateIsRunning;
|
|
cl_event currentCompactKernelEvent;
|
|
cl_event currentCollateKernelEvent;
|
|
|
|
// Memory tracking
|
|
void* assemblyBufferPtr;
|
|
size_t assemblyBufferSize;
|
|
void* collationBufferPtr;
|
|
size_t collationBufferSize;
|
|
void* averageIntensityBufferPtr;
|
|
size_t averageIntensityBufferSize;
|
|
// Mapped buffer pointers (for zero-copy synchronization)
|
|
void* mappedAssemblyBuffer;
|
|
void* mappedCollationBuffer;
|
|
void* mappedAverageIntensityBuffer;
|
|
|
|
// Frame descriptor (cached from setup)
|
|
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
|
|
|
|
// Callback storage
|
|
compactKernelCbFn compactKernelCb;
|
|
collateKernelCbFn collateKernelCb;
|
|
|
|
// Timestamp tracking for kernel execution
|
|
std::chrono::high_resolution_clock::time_point compactKernelStartTime;
|
|
std::chrono::high_resolution_clock::time_point compactKernelEndTime;
|
|
std::chrono::high_resolution_clock::time_point collateKernelStartTime;
|
|
std::chrono::high_resolution_clock::time_point collateKernelEndTime;
|
|
|
|
// Static callbacks for OpenCL events
|
|
static void CL_CALLBACK compactKernelEventCallback(
|
|
cl_event event, cl_int event_command_exec_status, void* user_data);
|
|
static void CL_CALLBACK collateKernelEventCallback(
|
|
cl_event event, cl_int event_command_exec_status, void* user_data);
|
|
|
|
// Private helper methods
|
|
bool compileAndPrepareKernel(
|
|
const char* kernelSource, size_t kernelSourceLen,
|
|
const char* kernelName, cl_program& program, cl_kernel& kernel);
|
|
bool compileAndPrepareKernels();
|
|
bool setupSlotCompactorsArgs(
|
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
|
|
bool setupCollateDgramsArgs(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame);
|
|
|
|
// Generic buffer mapping/unmapping for zero-copy synchronization
|
|
bool mapBuffer(
|
|
cl_mem buffer, size_t size, cl_map_flags mapFlags, void*& mappedPtr);
|
|
bool unmapBuffer(cl_mem buffer, void*& mappedPtr);
|
|
|
|
// Wrapper functions for specific buffers
|
|
bool mapAssemblyBuffer(cl_map_flags mapFlags = CL_MAP_READ);
|
|
bool unmapAssemblyBuffer();
|
|
bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ);
|
|
bool unmapCollationBuffer();
|
|
bool mapAverageIntensityBuffer(cl_map_flags mapFlags = CL_MAP_READ);
|
|
bool unmapAverageIntensityBuffer();
|
|
|
|
// Forward declaration for continuation class
|
|
class CompactCollateAndMeshFrameReq;
|
|
|
|
// Unified kernel start function
|
|
template<typename SetupArgsFn, typename ValidateBuffersFn>
|
|
bool startKernel(
|
|
cl_kernel kernel,
|
|
cl_event* eventPtr,
|
|
SetupArgsFn setupArgsFn,
|
|
ValidateBuffersFn validateBuffersFn,
|
|
size_t globalWorkSize,
|
|
void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*),
|
|
const char* kernelName,
|
|
bool& isRunning)
|
|
{
|
|
if (isRunning)
|
|
{
|
|
std::cerr << __func__ << ": already running, call stop() first"
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Validate buffers
|
|
validateBuffersFn();
|
|
|
|
// Set up kernel arguments
|
|
if (!setupArgsFn()) {
|
|
return false;
|
|
}
|
|
|
|
// Enqueue kernel execution
|
|
cl_int err = clEnqueueNDRangeKernel(
|
|
computeDevice->commandQueue, kernel,
|
|
1, nullptr, &globalWorkSize, nullptr,
|
|
0, nullptr, eventPtr);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to enqueue " << kernelName
|
|
<< " kernel: " << err << std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Set up callback using static member function
|
|
err = clSetEventCallback(
|
|
*eventPtr, CL_COMPLETE, eventCallback, this);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set event callback: " << err
|
|
<< std::endl;
|
|
clReleaseEvent(*eventPtr);
|
|
*eventPtr = nullptr;
|
|
return false;
|
|
}
|
|
|
|
// Force queue flush to ensure event processing and callback invocation
|
|
err = clFlush(computeDevice->commandQueue);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to flush queue: " << err
|
|
<< std::endl;
|
|
clReleaseEvent(*eventPtr);
|
|
*eventPtr = nullptr;
|
|
return false;
|
|
}
|
|
|
|
isRunning = true;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
} // namespace stim_buff
|
|
} // namespace smo
|
|
|
|
#endif // _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
|
|
|