OClCollMeshEngn,PcloudStimProd: port to sscl::co coros

We've now ported the OpenClCollMeshEngn and PcloudStimProd::produceFrameReq
portions of the Livox pipeline to coros.
This commit is contained in:
2026-05-30 19:32:19 -04:00
parent 1cf1be4194
commit 35eb466a60
7 changed files with 398 additions and 373 deletions
@@ -8,10 +8,10 @@
#include <algorithm>
#include <boost/system/error_code.hpp>
#include <boost/asio/deadline_timer.hpp>
#include <spinscale/cps/asynchronousContinuation.h>
#include <spinscale/cps/asynchronousBridge.h>
#include <spinscale/cps/callback.h>
#include <spinscale/asynchronousLoop.h>
#include <spinscale/co/invokers.h>
#include <spinscale/cps/asynchronousBridge.h>
#include <adapters/opencl/clKernelCompletionAReq.h>
#include <componentThread.h>
#include <user/stimulusFrame.h>
#include <livoxProto1/device.h>
@@ -998,162 +998,135 @@ void OpenClCollatingAndMeshingEngine::produceAmbienceStimulusFrame(
passbandCountOut = passbandCount;
}
class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq
: public sscl::cps::PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>
sscl::co::ViralNonPostingInvoker<bool>
OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameCReq(
sscl::AsynchronousLoop& frameAssemblyResult,
StimulusFrame& /*stimulusFrame*/,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc,
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc)
{
private:
OpenClCollatingAndMeshingEngine& engine;
sscl::AsynchronousLoop frameAssemblyResult;
StimulusFrame& stimulusFrame;
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc;
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc;
public:
CompactCollateAndMeshFrameReq(
OpenClCollatingAndMeshingEngine& engine_,
sscl::AsynchronousLoop& asyncLoop,
StimulusFrame& stimulusFrame_,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc_,
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc_,
const std::shared_ptr<sscl::ComponentThread>& caller,
sscl::cps::Callback<compactCollateAndMeshFrameReqCbFn> cb)
: sscl::cps::PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
caller, cb),
engine(engine_),
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
intensityStimFrame(intensityStimFrame_),
lightAmbienceProductionDesc(std::move(lightAmbienceProductionDesc_)),
darkAmbienceProductionDesc(std::move(darkAmbienceProductionDesc_))
{}
bool anyAmbienceAttached() const
{
return lightAmbienceProductionDesc.has_value()
|| darkAmbienceProductionDesc.has_value();
sscl::SpinLock::Guard guard(openClCollMeshEngnCanceler.s.lock);
if (openClCollMeshEngnCanceler.isCancellationRequestedUnlocked())
{ co_return false; }
}
public:
void callOriginalCallback(bool success)
{ callOriginalCb(success, std::ref(stimulusFrame)); }
auto& resumeIoContext = parent.device->componentThread->getIoContext();
public:
void compactCollateAndMeshFrameReq1_doCompact_posted(
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
bool needsCompaction = IoUringAssemblyEngine::compactionIsNeeded(
frameAssemblyResult.nSucceeded.load(), frameAssemblyResult.nTotal);
bool anyAmbienceAttached = lightAmbienceProductionDesc.has_value()
|| darkAmbienceProductionDesc.has_value();
if (needsCompaction)
{
sscl::SpinLock::Guard guard(engine.openClCollMeshEngnCanceler.s.lock);
if (engine.openClCollMeshEngnCanceler.isCancellationRequestedUnlocked())
// compactCollateAndMeshFrameReq1_doCompact_posted
{
callOriginalCallback(false);
return;
sscl::SpinLock::Guard guard(openClCollMeshEngnCanceler.s.lock);
if (openClCollMeshEngnCanceler.isCancellationRequestedUnlocked())
{ co_return false; }
// Record compact kernel start time
compactKernelStartTime = std::chrono::high_resolution_clock::now();
}
// Record compact kernel start time
engine.compactKernelStartTime = std::chrono::high_resolution_clock::now();
bool success = engine.startCompactKernel(
engine.parent.assemblyBuffer,
static_cast<uint32_t>(context->frameAssemblyResult.nSucceeded.load()),
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq2_compactDone_posted,
context.get(), context,
std::placeholders::_1));
if (!success)
cl_int compactStatus = co_await openclBoundary::getClKernelCompletionAReqAwaiter(
resumeIoContext,
[this, &frameAssemblyResult](std::function<void(cl_int)> completionCb)
{
engine.compactKernelComplete();
callOriginalCallback(false);
return;
}
}
bool success = startCompactKernel(
parent.assemblyBuffer,
static_cast<uint32_t>(
frameAssemblyResult.nSucceeded.load()),
std::move(completionCb));
if (!success)
{
compactKernelComplete();
completionCb(CL_INVALID_OPERATION);
}
});
void compactCollateAndMeshFrameReq2_compactDone_posted(
std::shared_ptr<CompactCollateAndMeshFrameReq> context,
cl_int compactStatus)
{
sscl::SpinLock::Guard guard(engine.openClCollMeshEngnCanceler.s.lock);
if (engine.openClCollMeshEngnCanceler.isCancellationRequestedUnlocked())
if (compactStatus == CL_INVALID_OPERATION)
{
/** EXPLANATION:
* We intentionally don't call compactKernelComplete() here because
* if shouldAcceptRequests is false, then the caller that called
* finalize() will also be forced to call compactKernelComplete()
* inside of finalize().
*/
callOriginalCallback(false);
return;
co_return false;
}
engine.compactKernelComplete();
// Record compact kernel end time
engine.compactKernelEndTime = std::chrono::high_resolution_clock::now();
// If compact failed, call callback directly with failure
if (compactStatus != CL_SUCCESS)
// compactCollateAndMeshFrameReq2_compactDone_posted
{
callOriginalCallback(false);
return;
}
sscl::SpinLock::Guard guard(openClCollMeshEngnCanceler.s.lock);
if (openClCollMeshEngnCanceler.isCancellationRequestedUnlocked())
{
/** EXPLANATION:
* We intentionally don't call compactKernelComplete() here because
* if shouldAcceptRequests is false, then the caller that called
* finalize() will also be forced to call compactKernelComplete()
* inside of finalize().
*/
co_return false;
}
compactKernelComplete();
// Record compact kernel end time
compactKernelEndTime = std::chrono::high_resolution_clock::now();
// If compact failed, call callback directly with failure
if (compactStatus != CL_SUCCESS) {
co_return false;
}
#if 0
// Print first 4 bytes of each slot
if (engine.frameAssemblyDesc)
{
for (size_t i = 0; i < engine.frameAssemblyDesc->numSlots; ++i) {
engine.parent.ioUringAssemblyEngine.printSlotBytes(i, 4);
// Print first 4 bytes of each slot
if (frameAssemblyDesc)
{
for (size_t i = 0; i < frameAssemblyDesc->numSlots; ++i) {
parent.ioUringAssemblyEngine.printSlotBytes(i, 4);
}
}
}
#endif
guard.unlockPrematurely();
context->compactCollateAndMeshFrameReq3_doCollate_posted(context);
}
}
void compactCollateAndMeshFrameReq3_doCollate_posted(
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
// compactCollateAndMeshFrameReq3_doCollate_posted
{
sscl::SpinLock::Guard guard(engine.openClCollMeshEngnCanceler.s.lock);
if (engine.openClCollMeshEngnCanceler.isCancellationRequestedUnlocked())
{
callOriginalCallback(false);
return;
sscl::SpinLock::Guard guard(openClCollMeshEngnCanceler.s.lock);
if (openClCollMeshEngnCanceler.isCancellationRequestedUnlocked()) {
co_return false;
}
// Record collate kernel start time
engine.collateKernelStartTime = std::chrono::high_resolution_clock::now();
bool success = engine.startCollateKernel(
context->intensityStimFrame, context->anyAmbienceAttached(),
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
context.get(), context,
std::placeholders::_1));
if (!success)
{
engine.collateKernelComplete(
context->intensityStimFrame, context->anyAmbienceAttached());
callOriginalCallback(false);
return;
}
collateKernelStartTime = std::chrono::high_resolution_clock::now();
}
void compactCollateAndMeshFrameReq4_collateDone_maybePosted(
[[maybe_unused]] std::shared_ptr<CompactCollateAndMeshFrameReq> context,
cl_int collateStatus)
cl_int collateStatus = co_await openclBoundary::getClKernelCompletionAReqAwaiter(
resumeIoContext,
[this, intensityStimFrame, anyAmbienceAttached](
std::function<void(cl_int)> completionCb)
{
sscl::SpinLock::Guard guard(engine.openClCollMeshEngnCanceler.s.lock);
if (engine.openClCollMeshEngnCanceler.isCancellationRequestedUnlocked())
bool success = startCollateKernel(
intensityStimFrame, anyAmbienceAttached,
std::move(completionCb));
if (!success)
{
collateKernelComplete(
intensityStimFrame, anyAmbienceAttached);
completionCb(CL_INVALID_OPERATION);
}
});
if (collateStatus == CL_INVALID_OPERATION) {
co_return false;
}
// compactCollateAndMeshFrameReq4_collateDone_maybePosted
{
sscl::SpinLock::Guard guard(openClCollMeshEngnCanceler.s.lock);
if (openClCollMeshEngnCanceler.isCancellationRequestedUnlocked()) {
/* We intentionally don't call collateKernelComplete() here for the
* same reason as above.
*/
callOriginalCallback(false);
return;
co_return false;
}
/** EXPLANATION:
@@ -1165,55 +1138,51 @@ public:
* Therefore it's finalize()'s responsibility to ensure that it properly
* completes/cleans up any in-flight operations.
*/
engine.collateKernelComplete(
context->intensityStimFrame, context->anyAmbienceAttached());
collateKernelComplete(
intensityStimFrame, anyAmbienceAttached);
// Produce each attached ambience stimbuff's passband count from
// the per-slot averages the collate kernel staged.
uint32_t nSucceededForAmbience =
context->frameAssemblyResult.nSucceeded.load();
frameAssemblyResult.nSucceeded.load();
if (context->lightAmbienceProductionDesc.has_value())
if (lightAmbienceProductionDesc.has_value())
{
engine.produceAmbienceStimulusFrame(
context->lightAmbienceProductionDesc->frame.get(),
context->lightAmbienceProductionDesc->comparator,
produceAmbienceStimulusFrame(
lightAmbienceProductionDesc->frame.get(),
lightAmbienceProductionDesc->comparator,
nSucceededForAmbience);
}
if (context->darkAmbienceProductionDesc.has_value())
if (darkAmbienceProductionDesc.has_value())
{
engine.produceAmbienceStimulusFrame(
context->darkAmbienceProductionDesc->frame.get(),
context->darkAmbienceProductionDesc->comparator,
produceAmbienceStimulusFrame(
darkAmbienceProductionDesc->frame.get(),
darkAmbienceProductionDesc->comparator,
nSucceededForAmbience);
}
// Record collate kernel end time
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
bool success = (collateStatus == CL_SUCCESS);
collateKernelEndTime = std::chrono::high_resolution_clock::now();
// Early callback + return pattern
if (!success)
{
callOriginalCallback(false);
return;
}
bool success = (collateStatus == CL_SUCCESS);
if (!success) { co_return false; }
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
uint32_t nSucceeded = frameAssemblyResult.nSucceeded.load();
int returnMode = static_cast<int>(engine.parent.device->currentReturnMode);
int returnMode = static_cast<int>(parent.device->currentReturnMode);
size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
returnMode);
size_t totalPoints = nSucceeded * pointsPerDgram;
// Count points with intensity greater than 116
size_t highIntensityCount = 0;
if (context->intensityStimFrame.has_value())
if (intensityStimFrame.has_value())
{
StimulusFrame& intensityFrame = context->intensityStimFrame->get();
float* intensityFloats = reinterpret_cast<float*>(intensityFrame.slotDesc.vaddr);
StimulusFrame& intensityFrame = intensityStimFrame->get();
float* intensityFloats = reinterpret_cast<float*>(
intensityFrame.slotDesc.vaddr);
for (size_t i = 0; i < totalPoints; ++i)
{
float intensity = intensityFloats[i];
@@ -1227,62 +1196,15 @@ public:
#if 0
std::cout << __func__ << ": intensityRingBufferIndex="
<< (context->intensityStimFrame.has_value() ?
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
<< (intensityStimFrame.has_value() ?
intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", pointsPerDgram=" << pointsPerDgram
<< ", nSucceeded=" << nSucceeded
<< ", totalPoints=" << totalPoints
<< ", highIntensityCount=" << highIntensityCount << std::endl;
#endif
callOriginalCallback(success);
}
};
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
sscl::AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<AmbienceProductionDesc> lightAmbienceProductionDesc,
std::optional<AmbienceProductionDesc> darkAmbienceProductionDesc,
sscl::cps::Callback<compactCollateAndMeshFrameReqCbFn> callback)
{
{
sscl::SpinLock::Guard guard(openClCollMeshEngnCanceler.s.lock);
if (openClCollMeshEngnCanceler.isCancellationRequestedUnlocked())
{
callback.callbackFn(false, stimulusFrame);
return;
}
}
auto caller = smoHooksPtr->ComponentThread_getSelf();
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
*this, asyncLoop, stimulusFrame, intensityStimFrame,
std::move(lightAmbienceProductionDesc), std::move(darkAmbienceProductionDesc),
caller,
std::move(callback));
// Check if compaction is needed
bool needsCompaction = IoUringAssemblyEngine::compactionIsNeeded(
asyncLoop.nSucceeded.load(), asyncLoop.nTotal);
// Start with compaction if needed, then chain to collation
if (needsCompaction)
{
boost::asio::post(parent.device->componentThread->getIoContext(),
STC(std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq1_doCompact_posted,
request.get(), request)));
}
else
{
// Skip compaction, go straight to collation
boost::asio::post(parent.device->componentThread->getIoContext(),
STC(std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq3_doCollate_posted,
request.get(), request)));
co_return success;
}
}