322a8137b2
This reverts commit d788810a05.
We're doing this because it's not necessary. We will be porting to
coros soon and we can just use brace-scopes.
296 lines
9.0 KiB
C++
296 lines
9.0 KiB
C++
#ifndef _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
|
|
#define _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
|
|
|
|
#include <boostAsioLinkageFix.h>
|
|
#include <cstdint>
|
|
#include <cstddef>
|
|
#include <memory>
|
|
#include <type_traits>
|
|
#include <functional>
|
|
#include <optional>
|
|
#include <iostream>
|
|
#include <stdexcept>
|
|
#include <chrono>
|
|
#define CL_TARGET_OPENCL_VERSION 120
|
|
#include <CL/cl.h>
|
|
#include <spinscale/asynchronousLoop.h>
|
|
#include <spinscale/cps/callback.h>
|
|
#include <spinscale/spinLock.h>
|
|
#include <user/stimulusFrame.h>
|
|
#include <user/stagingBuffer.h>
|
|
#include <user/frameAssemblyDesc.h>
|
|
#include <user/compute.h>
|
|
#include <user/senseApiDesc.h>
|
|
#include "pcloudAmbienceQualeIfaceApi.h"
|
|
|
|
#define OCLCOLLMESH_ENGN_FINALIZE_DELAY_MS 1
|
|
|
|
namespace smo {
|
|
namespace stim_buff {
|
|
|
|
/* One "job" per attached ambience stimbuff: the StimulusFrame to write the
|
|
* uint32 passband count into, and the comparator to apply to the per-slot
|
|
* averages the collate kernel staged into averageIntensityBuffer. A job is
|
|
* only constructed when its corresponding ambience stimbuff is attached.
|
|
*/
|
|
struct AmbienceProductionDesc
|
|
{
|
|
std::reference_wrapper<StimulusFrame> frame;
|
|
ParamComparator comparator;
|
|
};
|
|
|
|
// Custom deleters for OpenCL handles
|
|
struct ClProgramDeleter
|
|
{
|
|
void operator()(cl_program prog) const
|
|
{ if (prog) { clReleaseProgram(prog); } }
|
|
};
|
|
|
|
struct ClKernelDeleter
|
|
{
|
|
void operator()(cl_kernel kernel) const
|
|
{ if (kernel) { clReleaseKernel(kernel); } }
|
|
};
|
|
|
|
struct ClEventDeleter
|
|
{
|
|
void operator()(cl_event event) const
|
|
{ if (event) { clReleaseEvent(event); } }
|
|
};
|
|
|
|
// Type aliases for OpenCL handle unique_ptrs
|
|
using ClProgramPtr = std::unique_ptr<
|
|
std::remove_pointer_t<cl_program>, ClProgramDeleter>;
|
|
using ClKernelPtr = std::unique_ptr<
|
|
std::remove_pointer_t<cl_kernel>, ClKernelDeleter>;
|
|
using ClEventPtr = std::unique_ptr<
|
|
std::remove_pointer_t<cl_event>, ClEventDeleter>;
|
|
|
|
class PcloudStimulusProducer;
|
|
|
|
class OpenClCollatingAndMeshingEngine
|
|
{
|
|
public:
|
|
explicit OpenClCollatingAndMeshingEngine(PcloudStimulusProducer& parent);
|
|
~OpenClCollatingAndMeshingEngine();
|
|
|
|
// Non-copyable, movable
|
|
OpenClCollatingAndMeshingEngine(
|
|
const OpenClCollatingAndMeshingEngine&) = delete;
|
|
OpenClCollatingAndMeshingEngine& operator=(
|
|
const OpenClCollatingAndMeshingEngine&) = delete;
|
|
OpenClCollatingAndMeshingEngine(
|
|
OpenClCollatingAndMeshingEngine&&) = default;
|
|
OpenClCollatingAndMeshingEngine& operator=(
|
|
OpenClCollatingAndMeshingEngine&&) = default;
|
|
|
|
bool setup();
|
|
void finalize();
|
|
|
|
typedef std::function<void(bool, StimulusFrame&)>
|
|
compactCollateAndMeshFrameReqCbFn;
|
|
void compactCollateAndMeshFrameReq(
|
|
sscl::AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc,
|
|
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc,
|
|
sscl::cps::Callback<compactCollateAndMeshFrameReqCbFn> callback);
|
|
|
|
private:
|
|
// Callback function types
|
|
typedef std::function<void(cl_int)> compactKernelCbFn;
|
|
typedef std::function<void(cl_int)> collateKernelCbFn;
|
|
|
|
bool startCompactKernel(
|
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
|
compactKernelCbFn callback);
|
|
bool startCollateKernel(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
bool anyAmbienceAttached,
|
|
collateKernelCbFn callback);
|
|
|
|
void compactKernelComplete(bool isFinalizing=false);
|
|
void collateKernelComplete(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
bool anyAmbienceAttached,
|
|
bool isFinalizing=false);
|
|
bool stop();
|
|
|
|
/* Apply `comparator` to the nSucceeded per-slot averages the collate
|
|
* kernel wrote into averageIntensityBuffer, and write the resulting
|
|
* uint32 passband count as the single stimspot of `ambienceFrame`.
|
|
*/
|
|
void produceAmbienceStimulusFrame(
|
|
StimulusFrame& ambienceFrame,
|
|
const ParamComparator& comparator,
|
|
uint32_t nSucceeded);
|
|
|
|
public:
|
|
// Get kernel execution durations in milliseconds
|
|
std::chrono::milliseconds getCompactKernelDuration() const;
|
|
std::chrono::milliseconds getCollateKernelDuration() const;
|
|
|
|
private:
|
|
PcloudStimulusProducer& parent;
|
|
|
|
// OpenCL infrastructure (managed by ComputeManager)
|
|
std::shared_ptr<smo::compute::ComputeDevice> computeDevice;
|
|
ClProgramPtr slotCompactorProgram;
|
|
ClProgramPtr collateProgram;
|
|
ClKernelPtr slotCompactorKernel;
|
|
ClKernelPtr collateKernel;
|
|
|
|
// OpenCL buffers (managed by ComputeManager)
|
|
std::shared_ptr<smo::compute::ClBuffer> clAssemblyBufferClBuffer;
|
|
std::shared_ptr<smo::compute::ClBuffer> clCollationBufferClBuffer;
|
|
std::shared_ptr<smo::compute::ClBuffer> clAverageIntensityBufferClBuffer;
|
|
// Cached cl_mem handles for the device we're using
|
|
cl_mem clAssemblyBuffer;
|
|
cl_mem clCollationBuffer;
|
|
cl_mem clAverageIntensityBuffer;
|
|
|
|
// State tracking
|
|
sscl::SpinLock shouldAcceptRequestsLock;
|
|
bool shouldAcceptRequests;
|
|
bool compactIsRunning;
|
|
bool collateIsRunning;
|
|
cl_event currentCompactKernelEvent;
|
|
cl_event currentCollateKernelEvent;
|
|
|
|
// Memory tracking
|
|
void* assemblyBufferPtr;
|
|
size_t assemblyBufferSize;
|
|
void* collationBufferPtr;
|
|
size_t collationBufferSize;
|
|
void* averageIntensityBufferPtr;
|
|
size_t averageIntensityBufferSize;
|
|
// Mapped buffer pointers (for zero-copy synchronization)
|
|
void* mappedAssemblyBuffer;
|
|
void* mappedCollationBuffer;
|
|
void* mappedAverageIntensityBuffer;
|
|
|
|
// Frame descriptor (cached from setup)
|
|
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
|
|
|
|
// Callback storage
|
|
compactKernelCbFn compactKernelCb;
|
|
collateKernelCbFn collateKernelCb;
|
|
|
|
// Timestamp tracking for kernel execution
|
|
std::chrono::high_resolution_clock::time_point compactKernelStartTime;
|
|
std::chrono::high_resolution_clock::time_point compactKernelEndTime;
|
|
std::chrono::high_resolution_clock::time_point collateKernelStartTime;
|
|
std::chrono::high_resolution_clock::time_point collateKernelEndTime;
|
|
|
|
// Static callbacks for OpenCL events
|
|
static void CL_CALLBACK compactKernelEventCallback(
|
|
cl_event event, cl_int event_command_exec_status, void* user_data);
|
|
static void CL_CALLBACK collateKernelEventCallback(
|
|
cl_event event, cl_int event_command_exec_status, void* user_data);
|
|
|
|
// Private helper methods
|
|
bool compileAndPrepareKernel(
|
|
const std::shared_ptr<smo::compute::ComputeDevice>& computeDevice,
|
|
const char* kernelSource, size_t kernelSourceLen,
|
|
const char* kernelName, ClProgramPtr& program, ClKernelPtr& kernel);
|
|
bool compileAndPrepareKernels(
|
|
const std::shared_ptr<smo::compute::ComputeDevice>& computeDevice,
|
|
ClProgramPtr& slotCompactorProgram, ClProgramPtr& collateProgram,
|
|
ClKernelPtr& slotCompactorKernel, ClKernelPtr& collateKernel);
|
|
bool setupSlotCompactorsArgs(
|
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
|
|
bool setupCollateDgramsArgs(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
bool anyAmbienceAttached);
|
|
|
|
// Generic buffer mapping/unmapping for zero-copy synchronization
|
|
bool mapBuffer(
|
|
cl_mem buffer, size_t size, cl_map_flags mapFlags, void*& mappedPtr);
|
|
bool unmapBuffer(cl_mem buffer, void*& mappedPtr);
|
|
|
|
// Wrapper functions for specific buffers
|
|
bool mapAssemblyBuffer(cl_map_flags mapFlags = CL_MAP_READ);
|
|
bool unmapAssemblyBuffer();
|
|
bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ);
|
|
bool unmapCollationBuffer();
|
|
bool mapAverageIntensityBuffer(cl_map_flags mapFlags = CL_MAP_READ);
|
|
bool unmapAverageIntensityBuffer();
|
|
|
|
// Forward declaration for continuation class
|
|
class CompactCollateAndMeshFrameReq;
|
|
|
|
// Unified kernel start function
|
|
template<typename SetupArgsFn, typename ValidateBuffersFn>
|
|
bool startKernel(
|
|
cl_kernel kernel,
|
|
cl_event* eventPtr,
|
|
SetupArgsFn setupArgsFn,
|
|
ValidateBuffersFn validateBuffersFn,
|
|
size_t globalWorkSize,
|
|
void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*),
|
|
const char* kernelName,
|
|
bool& isRunning)
|
|
{
|
|
if (isRunning)
|
|
{
|
|
std::cerr << __func__ << ": already running, call stop() first"
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Validate buffers
|
|
validateBuffersFn();
|
|
|
|
// Set up kernel arguments
|
|
if (!setupArgsFn()) {
|
|
return false;
|
|
}
|
|
|
|
// Enqueue kernel execution
|
|
cl_int err = clEnqueueNDRangeKernel(
|
|
computeDevice->commandQueue, kernel,
|
|
1, nullptr, &globalWorkSize, nullptr,
|
|
0, nullptr, eventPtr);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to enqueue " << kernelName
|
|
<< " kernel: " << err << std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Set up callback using static member function
|
|
err = clSetEventCallback(
|
|
*eventPtr, CL_COMPLETE, eventCallback, this);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set event callback: " << err
|
|
<< std::endl;
|
|
clReleaseEvent(*eventPtr);
|
|
*eventPtr = nullptr;
|
|
return false;
|
|
}
|
|
|
|
// Force queue flush to ensure event processing and callback invocation
|
|
err = clFlush(computeDevice->commandQueue);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to flush queue: " << err
|
|
<< std::endl;
|
|
clReleaseEvent(*eventPtr);
|
|
*eventPtr = nullptr;
|
|
return false;
|
|
}
|
|
|
|
isRunning = true;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
} // namespace stim_buff
|
|
} // namespace smo
|
|
|
|
#endif // _LIVOX_GEN1_OPENCL_COLLATING_AND_MESHING_ENGINE_H
|
|
|