Lg1: Implement both light|darkAmbience stimBuffs & their production

We now produce both light and dark ambience stimframes into
stimbuffs for the LivoxGen1 lidar devices.
This commit is contained in:
2026-04-18 14:54:14 -04:00
parent 632a227985
commit 27a5d48451
15 changed files with 668 additions and 336 deletions
@@ -35,8 +35,10 @@ OpenClCollatingAndMeshingEngine::OpenClCollatingAndMeshingEngine(
computeDevice(nullptr),
clAssemblyBufferClBuffer(nullptr),
clCollationBufferClBuffer(nullptr),
clAverageIntensityBufferClBuffer(nullptr),
clAssemblyBuffer(nullptr),
clCollationBuffer(nullptr),
clAverageIntensityBuffer(nullptr),
shouldAcceptRequests(false),
compactIsRunning(false),
collateIsRunning(false),
@@ -45,8 +47,11 @@ assemblyBufferPtr(nullptr),
assemblyBufferSize(0),
collationBufferPtr(nullptr),
collationBufferSize(0),
averageIntensityBufferPtr(nullptr),
averageIntensityBufferSize(0),
mappedAssemblyBuffer(nullptr),
mappedCollationBuffer(nullptr),
mappedAverageIntensityBuffer(nullptr),
frameAssemblyDesc(nullptr)
{
}
@@ -85,11 +90,15 @@ bool OpenClCollatingAndMeshingEngine::setup()
// Get StagingBuffer memory pointers from parent
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
struct iovec averageIntensityIov = parent.averageIntensityBuffer
.getClEngineIovec();
assemblyBufferPtr = assemblyIov.iov_base;
assemblyBufferSize = assemblyIov.iov_len;
collationBufferPtr = collationIov.iov_base;
collationBufferSize = collationIov.iov_len;
averageIntensityBufferPtr = averageIntensityIov.iov_base;
averageIntensityBufferSize = averageIntensityIov.iov_len;
// Get FrameAssemblyDesc from assembly buffer
frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>(
@@ -131,13 +140,33 @@ bool OpenClCollatingAndMeshingEngine::setup()
return false;
}
/* CL_MEM_WRITE_ONLY describes *kernel* access: the collate kernel only
* writes per-slot averages, never reads them. Host-side reads in
* produceAmbienceStimulusFrame go through clEnqueueMapBuffer(CL_MAP_READ)
* which is independent of this flag.
*/
auto wip_clAverageIntensityBufferClBuffer = smoHooksPtr
->ComputeManager_createUseHostPtrBuffer(
averageIntensityBufferPtr, averageIntensityBufferSize,
CL_MEM_WRITE_ONLY);
if (!wip_clAverageIntensityBufferClBuffer)
{
std::cerr << __func__ << ": failed to create average intensity buffer"
<< std::endl;
return false;
}
// Cache cl_mem handles for the device we're using
cl_mem wip_clAssemblyBuffer = wip_clAssemblyBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice);
cl_mem wip_clCollationBuffer = wip_clCollationBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice);
cl_mem wip_clAverageIntensityBuffer = wip_clAverageIntensityBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice);
if (!wip_clAssemblyBuffer || !wip_clCollationBuffer)
if (!wip_clAssemblyBuffer || !wip_clCollationBuffer
|| !wip_clAverageIntensityBuffer)
{
std::cerr << __func__ << ": failed to get buffer handles for device"
<< std::endl;
@@ -162,8 +191,10 @@ bool OpenClCollatingAndMeshingEngine::setup()
computeDevice = wip_computeDevice;
clAssemblyBufferClBuffer = wip_clAssemblyBufferClBuffer;
clCollationBufferClBuffer = wip_clCollationBufferClBuffer;
clAverageIntensityBufferClBuffer = wip_clAverageIntensityBufferClBuffer;
clAssemblyBuffer = wip_clAssemblyBuffer;
clCollationBuffer = wip_clCollationBuffer;
clAverageIntensityBuffer = wip_clAverageIntensityBuffer;
slotCompactorProgram = std::move(wip_slotCompactorProgram);
collateProgram = std::move(wip_collateProgram);
slotCompactorKernel = std::move(wip_slotCompactorKernel);
@@ -184,7 +215,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Complete any running kernels
if (compactIsRunning) { compactKernelComplete(true); }
if (collateIsRunning) {
collateKernelComplete(std::nullopt, std::nullopt, true);
collateKernelComplete(std::nullopt, false, true);
}
{
@@ -227,6 +258,12 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Release OpenCL buffers via smo hooks
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
{
if (clAverageIntensityBufferClBuffer)
{
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
clAverageIntensityBufferClBuffer);
clAverageIntensityBufferClBuffer.reset();
}
if (clCollationBufferClBuffer)
{
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
@@ -242,6 +279,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
}
// Reset cached cl_mem handles
clAverageIntensityBuffer = nullptr;
clCollationBuffer = nullptr;
clAssemblyBuffer = nullptr;
@@ -268,6 +306,8 @@ void OpenClCollatingAndMeshingEngine::finalize()
assemblyBufferSize = 0;
collationBufferPtr = nullptr;
collationBufferSize = 0;
averageIntensityBufferPtr = nullptr;
averageIntensityBufferSize = 0;
frameAssemblyDesc = nullptr;
}
@@ -364,7 +404,7 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool anyAmbienceAttached,
collateKernelCbFn callback)
{
// Store the caller's callback
@@ -374,11 +414,15 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
auto validateBuffers = [this]() {
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
struct iovec averageIntensityIov = parent.averageIntensityBuffer
.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize
|| collationIov.iov_base != collationBufferPtr
|| collationIov.iov_len != collationBufferSize)
|| collationIov.iov_len != collationBufferSize
|| averageIntensityIov.iov_base != averageIntensityBufferPtr
|| averageIntensityIov.iov_len != averageIntensityBufferSize)
{
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
@@ -386,9 +430,9 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
};
// Setup args callable
auto setupArgs = [this, intensityStimFrame, ambienceStimFrame]()
auto setupArgs = [this, intensityStimFrame, anyAmbienceAttached]()
{
return setupCollateDgramsArgs(intensityStimFrame, ambienceStimFrame);
return setupCollateDgramsArgs(intensityStimFrame, anyAmbienceAttached);
};
/** EXPLANATION:
@@ -439,27 +483,17 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
}
}
// Map/unmap ambience stim frame buffer (collate writes per-slot averages here)
if (ambienceStimFrame.has_value())
// Map/unmap average intensity staging buffer (collate writes per-slot
// averages here when any ambience stimbuff is attached).
if (anyAmbienceAttached)
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
if (!mapAverageIntensityBuffer(CL_MAP_WRITE_INVALIDATE_REGION))
{
void* mappedAmbienceBuffer = nullptr;
if (!mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer))
{
std::cerr << __func__ << ": failed to map ambience buffer"
<< std::endl;
return false;
}
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
std::cerr << __func__ << ": failed to map average intensity buffer"
<< std::endl;
return false;
}
unmapAverageIntensityBuffer();
}
// Calculate global work size (just num slots in the frame)
@@ -630,7 +664,7 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame)
bool anyAmbienceAttached)
{
// Extract parameters for collateDgrams kernel
uint32_t slotStride = static_cast<uint32_t>(
@@ -684,26 +718,20 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
// Set ambience buffer argument (arg 3): acquired PcloudAmbience StimulusFrame
cl_mem ambienceClBufferArg = nullptr;
if (ambienceStimFrame.has_value())
// Set ambience buffer argument (arg 3): per-slot average intensity
// staging buffer. Set when any ambience stimbuff is attached.
cl_mem averageIntensityClBufferArg =
anyAmbienceAttached ? clAverageIntensityBuffer : nullptr;
const size_t needBytes = static_cast<size_t>(nDgramsPerFrame)
* sizeof(float);
if (anyAmbienceAttached && averageIntensityBufferSize < needBytes)
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
const size_t needBytes = static_cast<size_t>(nDgramsPerFrame)
* sizeof(float);
if (ambienceFrame.slotDesc.nBytes < needBytes)
{
std::cerr << __func__ << ": ambience stim frame slot too small: "
<< ambienceFrame.slotDesc.nBytes << " < " << needBytes
<< std::endl;
return false;
}
ambienceClBufferArg = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
std::cerr << __func__ << ": average intensity buffer too small: "
<< averageIntensityBufferSize << " < " << needBytes << std::endl;
return false;
}
err = clSetKernelArg(
collateKernel.get(), 3, sizeof(cl_mem), &ambienceClBufferArg);
collateKernel.get(), 3, sizeof(cl_mem), &averageIntensityClBufferArg);
if (err != CL_SUCCESS)
{
@@ -782,7 +810,7 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
void OpenClCollatingAndMeshingEngine::collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool anyAmbienceAttached,
bool isFinalizing)
{
cl_map_flags mapFlags;
@@ -816,22 +844,12 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
}
}
// Sync GPU writes into ambience stim frame host backing store
if (ambienceStimFrame.has_value())
// Sync GPU writes into average intensity staging buffer host backing
// store so attached ambience stimbuffs can read the per-slot averages.
if (anyAmbienceAttached)
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_READ, mappedAmbienceBuffer))
{
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
if (mapAverageIntensityBuffer(mapFlags)) {
unmapAverageIntensityBuffer();
}
}
@@ -956,6 +974,39 @@ bool OpenClCollatingAndMeshingEngine::unmapCollationBuffer()
return true;
}
bool OpenClCollatingAndMeshingEngine::mapAverageIntensityBuffer(
cl_map_flags mapFlags)
{
return mapBuffer(
clAverageIntensityBuffer, averageIntensityBufferSize, mapFlags,
mappedAverageIntensityBuffer);
}
bool OpenClCollatingAndMeshingEngine::unmapAverageIntensityBuffer()
{
unmapBuffer(clAverageIntensityBuffer, mappedAverageIntensityBuffer);
mappedAverageIntensityBuffer = nullptr;
return true;
}
void OpenClCollatingAndMeshingEngine::produceAmbienceStimulusFrame(
StimulusFrame& ambienceFrame, const ParamComparator& comparator,
uint32_t nSucceeded)
{
const float* averages =
static_cast<const float*>(averageIntensityBufferPtr);
uint32_t passbandCount = 0;
for (uint32_t i = 0; i < nSucceeded; ++i) {
const float& average = averages[i];
if (comparator(average)) { ++passbandCount; }
}
uint32_t& passbandCountOut =
*reinterpret_cast<uint32_t*>(ambienceFrame.slotDesc.vaddr);
passbandCountOut = passbandCount;
}
class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq
: public sscl::PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>
{
@@ -964,7 +1015,8 @@ private:
sscl::AsynchronousLoop frameAssemblyResult;
StimulusFrame& stimulusFrame;
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame;
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc;
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc;
public:
CompactCollateAndMeshFrameReq(
@@ -972,7 +1024,8 @@ public:
sscl::AsynchronousLoop& asyncLoop,
StimulusFrame& stimulusFrame_,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame_,
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc_,
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc_,
const std::shared_ptr<sscl::ComponentThread>& caller,
sscl::Callback<compactCollateAndMeshFrameReqCbFn> cb)
: sscl::PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
@@ -980,9 +1033,16 @@ public:
engine(engine_),
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
intensityStimFrame(intensityStimFrame_),
ambienceStimFrame(ambienceStimFrame_)
lightAmbienceProductionDesc(std::move(lightAmbienceProductionDesc_)),
darkAmbienceProductionDesc(std::move(darkAmbienceProductionDesc_))
{}
bool anyAmbienceAttached() const
{
return lightAmbienceProductionDesc.has_value()
|| darkAmbienceProductionDesc.has_value();
}
public:
void callOriginalCallback(bool success)
{ callOriginalCb(success, std::ref(stimulusFrame)); }
@@ -1074,7 +1134,7 @@ public:
engine.collateKernelStartTime = std::chrono::high_resolution_clock::now();
bool success = engine.startCollateKernel(
context->intensityStimFrame, context->ambienceStimFrame,
context->intensityStimFrame, context->anyAmbienceAttached(),
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
@@ -1084,7 +1144,7 @@ public:
if (!success)
{
engine.collateKernelComplete(
context->intensityStimFrame, context->ambienceStimFrame);
context->intensityStimFrame, context->anyAmbienceAttached());
callOriginalCallback(false);
return;
@@ -1115,7 +1175,28 @@ public:
* completes/cleans up any in-flight operations.
*/
engine.collateKernelComplete(
context->intensityStimFrame, context->ambienceStimFrame);
context->intensityStimFrame, context->anyAmbienceAttached());
// Produce each attached ambience stimbuff's passband count from
// the per-slot averages the collate kernel staged.
uint32_t nSucceededForAmbience =
context->frameAssemblyResult.nSucceeded.load();
if (context->lightAmbienceProductionDesc.has_value())
{
engine.produceAmbienceStimulusFrame(
context->lightAmbienceProductionDesc->frame.get(),
context->lightAmbienceProductionDesc->comparator,
nSucceededForAmbience);
}
if (context->darkAmbienceProductionDesc.has_value())
{
engine.produceAmbienceStimulusFrame(
context->darkAmbienceProductionDesc->frame.get(),
context->darkAmbienceProductionDesc->comparator,
nSucceededForAmbience);
}
// Record collate kernel end time
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
@@ -1154,13 +1235,9 @@ public:
(void)highIntensityCount;
#if 0
// Legacy debug: ambience floats live in ambienceStimFrame after collate
std::cout << __func__ << ": intensityRingBufferIndex="
<< (context->intensityStimFrame.has_value() ?
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", ambienceRingBufferIndex="
<< (context->ambienceStimFrame.has_value() ?
context->ambienceStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", pointsPerDgram=" << pointsPerDgram
<< ", nSucceeded=" << nSucceeded
<< ", totalPoints=" << totalPoints
@@ -1174,7 +1251,8 @@ public:
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
sscl::AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc,
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc,
sscl::Callback<compactCollateAndMeshFrameReqCbFn> callback)
{
{
@@ -1188,7 +1266,8 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
auto caller = smoHooksPtr->ComponentThread_getSelf();
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
*this, asyncLoop, stimulusFrame, intensityStimFrame, ambienceStimFrame,
*this, asyncLoop, stimulusFrame, intensityStimFrame,
std::move(lightAmbienceProductionDesc), std::move(darkAmbienceProductionDesc),
caller,
std::move(callback));