LivoxGen1: Use syncCancelerForAsyncWork in producer pipeline

This commit is contained in:
2026-05-29 14:10:45 -04:00
parent 5a9fe12057
commit d788810a05
7 changed files with 507 additions and 442 deletions
@@ -39,7 +39,6 @@ clAverageIntensityBufferClBuffer(nullptr),
clAssemblyBuffer(nullptr),
clCollationBuffer(nullptr),
clAverageIntensityBuffer(nullptr),
shouldAcceptRequests(false),
compactIsRunning(false),
collateIsRunning(false),
currentCompactKernelEvent(nullptr), currentCollateKernelEvent(nullptr),
@@ -64,13 +63,10 @@ OpenClCollatingAndMeshingEngine::~OpenClCollatingAndMeshingEngine()
bool OpenClCollatingAndMeshingEngine::setup()
{
// Defensive check to prevent double-calling
if (!openClCollMeshEngnCanceler.isCancellationRequested())
{
sscl::SpinLock::Guard lock(shouldAcceptRequestsLock);
if (shouldAcceptRequests)
{
throw std::runtime_error(std::string(__func__) + ": setup() called "
"while already set up");
}
throw std::runtime_error(std::string(__func__) + ": setup() called "
"while already set up");
}
if (!smoHooksPtr || !smoHooksPtr->ComputeManager_getDevice)
@@ -202,7 +198,7 @@ bool OpenClCollatingAndMeshingEngine::setup()
clFlush(computeDevice->commandQueue);
clFinish(computeDevice->commandQueue);
shouldAcceptRequests = true;
openClCollMeshEngnCanceler.startAcceptingWork();
return true;
}
@@ -771,11 +767,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
bool OpenClCollatingAndMeshingEngine::stop()
{
// Acquire and release lock tightly around setting the flag
sscl::SpinLock::Guard lock(shouldAcceptRequestsLock);
bool wasAcceptingRequests = shouldAcceptRequests;
shouldAcceptRequests = false;
return wasAcceptingRequests;
return openClCollMeshEngnCanceler.requestStop();
}
void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
@@ -1051,28 +1043,33 @@ public:
void compactCollateAndMeshFrameReq1_doCompact_posted(
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
{
sscl::SpinLock::Guard lock(engine.shouldAcceptRequestsLock);
if (!engine.shouldAcceptRequests)
auto& canceler = engine.openClCollMeshEngnCanceler;
const bool shouldContinue = canceler.execUncancelableSegmentOrAbort(
[context, this]()
{
callOriginalCallback(false);
return;
}
// Record compact kernel start time
engine.compactKernelStartTime =
std::chrono::high_resolution_clock::now();
// Record compact kernel start time
engine.compactKernelStartTime = std::chrono::high_resolution_clock::now();
bool success = engine.startCompactKernel(
engine.parent.assemblyBuffer,
static_cast<uint32_t>(
context->frameAssemblyResult.nSucceeded.load()),
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq2_compactDone_posted,
context.get(), context,
std::placeholders::_1));
bool success = engine.startCompactKernel(
engine.parent.assemblyBuffer,
static_cast<uint32_t>(context->frameAssemblyResult.nSucceeded.load()),
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq2_compactDone_posted,
context.get(), context,
std::placeholders::_1));
if (!success)
{
engine.compactKernelComplete();
callOriginalCallback(false);
}
});
if (!success)
if (!shouldContinue)
{
engine.compactKernelComplete();
callOriginalCallback(false);
return;
}
@@ -1082,8 +1079,27 @@ public:
std::shared_ptr<CompactCollateAndMeshFrameReq> context,
cl_int compactStatus)
{
sscl::SpinLock::Guard lock(engine.shouldAcceptRequestsLock);
if (!engine.shouldAcceptRequests)
bool compactFailed = false;
auto& canceler = engine.openClCollMeshEngnCanceler;
const bool shouldContinue = canceler.execUncancelableSegmentOrAbort(
[context, this, compactStatus, &compactFailed]()
{
engine.compactKernelComplete();
// Record compact kernel end time
engine.compactKernelEndTime =
std::chrono::high_resolution_clock::now();
// If compact failed, call callback directly with failure
if (compactStatus != CL_SUCCESS)
{
compactFailed = true;
callOriginalCallback(false);
return;
}
});
if (!shouldContinue)
{
/** EXPLANATION:
* We intentionally don't call compactKernelComplete() here because
@@ -1095,16 +1111,7 @@ public:
return;
}
engine.compactKernelComplete();
// Record compact kernel end time
engine.compactKernelEndTime = std::chrono::high_resolution_clock::now();
// If compact failed, call callback directly with failure
if (compactStatus != CL_SUCCESS)
{
callOriginalCallback(false);
return;
}
if (compactFailed) { return; }
#if 0
// Print first 4 bytes of each slot
@@ -1116,36 +1123,39 @@ public:
}
#endif
lock.unlockPrematurely();
context->compactCollateAndMeshFrameReq3_doCollate_posted(context);
}
void compactCollateAndMeshFrameReq3_doCollate_posted(
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
{
sscl::SpinLock::Guard lock(engine.shouldAcceptRequestsLock);
if (!engine.shouldAcceptRequests)
auto& canceler = engine.openClCollMeshEngnCanceler;
const bool shouldContinue = canceler.execUncancelableSegmentOrAbort(
[context, this]()
{
callOriginalCallback(false);
return;
}
// Record collate kernel start time
engine.collateKernelStartTime =
std::chrono::high_resolution_clock::now();
// Record collate kernel start time
engine.collateKernelStartTime = std::chrono::high_resolution_clock::now();
bool success = engine.startCollateKernel(
context->intensityStimFrame, context->anyAmbienceAttached(),
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
context.get(), context,
std::placeholders::_1));
bool success = engine.startCollateKernel(
context->intensityStimFrame, context->anyAmbienceAttached(),
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
context.get(), context,
std::placeholders::_1));
if (!success)
{
engine.collateKernelComplete(
context->intensityStimFrame, context->anyAmbienceAttached());
if (!success)
callOriginalCallback(false);
}
});
if (!shouldContinue)
{
engine.collateKernelComplete(
context->intensityStimFrame, context->anyAmbienceAttached());
callOriginalCallback(false);
return;
}
@@ -1155,16 +1165,6 @@ public:
[[maybe_unused]] std::shared_ptr<CompactCollateAndMeshFrameReq> context,
cl_int collateStatus)
{
sscl::SpinLock::Guard lock(engine.shouldAcceptRequestsLock);
if (!engine.shouldAcceptRequests)
{
/* We intentionally don't call collateKernelComplete() here for the
* same reason as above.
*/
callOriginalCallback(false);
return;
}
/** EXPLANATION:
* The reason we don't call collateKernelComplete before checking
* shouldAcceptRequests is because if shouldAcceptRequests is false, then
@@ -1174,77 +1174,92 @@ public:
* Therefore it's finalize()'s responsibility to ensure that it properly
* completes/cleans up any in-flight operations.
*/
engine.collateKernelComplete(
context->intensityStimFrame, context->anyAmbienceAttached());
// Produce each attached ambience stimbuff's passband count from
// the per-slot averages the collate kernel staged.
uint32_t nSucceededForAmbience =
context->frameAssemblyResult.nSucceeded.load();
if (context->lightAmbienceProductionDesc.has_value())
auto& canceler = engine.openClCollMeshEngnCanceler;
const bool shouldContinue = canceler.execUncancelableSegmentOrAbort(
[context, this, collateStatus]()
{
engine.produceAmbienceStimulusFrame(
context->lightAmbienceProductionDesc->frame.get(),
context->lightAmbienceProductionDesc->comparator,
nSucceededForAmbience);
}
engine.collateKernelComplete(
context->intensityStimFrame, context->anyAmbienceAttached());
if (context->darkAmbienceProductionDesc.has_value())
{
engine.produceAmbienceStimulusFrame(
context->darkAmbienceProductionDesc->frame.get(),
context->darkAmbienceProductionDesc->comparator,
nSucceededForAmbience);
}
// Record collate kernel end time
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
bool success = (collateStatus == CL_SUCCESS);
// Early callback + return pattern
if (!success)
// Produce each attached ambience stimbuff's passband count from
// the per-slot averages the collate kernel staged.
uint32_t nSucceededForAmbience =
context->frameAssemblyResult.nSucceeded.load();
if (context->lightAmbienceProductionDesc.has_value())
{
engine.produceAmbienceStimulusFrame(
context->lightAmbienceProductionDesc->frame.get(),
context->lightAmbienceProductionDesc->comparator,
nSucceededForAmbience);
}
if (context->darkAmbienceProductionDesc.has_value())
{
engine.produceAmbienceStimulusFrame(
context->darkAmbienceProductionDesc->frame.get(),
context->darkAmbienceProductionDesc->comparator,
nSucceededForAmbience);
}
// Record collate kernel end time
engine.collateKernelEndTime =
std::chrono::high_resolution_clock::now();
bool success = (collateStatus == CL_SUCCESS);
// Early callback + return pattern
if (!success)
{
callOriginalCallback(false);
return;
}
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
int returnMode = static_cast<int>(engine.parent.device->currentReturnMode);
size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
returnMode);
size_t totalPoints = nSucceeded * pointsPerDgram;
// Count points with intensity greater than 116
size_t highIntensityCount = 0;
if (context->intensityStimFrame.has_value())
{
StimulusFrame& intensityFrame = context->intensityStimFrame->get();
float* intensityFloats = reinterpret_cast<float*>(intensityFrame.slotDesc.vaddr);
for (size_t i = 0; i < totalPoints; ++i)
{
float intensity = intensityFloats[i];
if (intensity >= 116.0f)
{
++highIntensityCount;
}
}
}
(void)highIntensityCount;
#if 0
std::cout << __func__ << ": intensityRingBufferIndex="
<< (context->intensityStimFrame.has_value() ?
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", pointsPerDgram=" << pointsPerDgram
<< ", nSucceeded=" << nSucceeded
<< ", totalPoints=" << totalPoints
<< ", highIntensityCount=" << highIntensityCount << std::endl;
#endif
callOriginalCallback(success);
});
if (!shouldContinue)
{
/* We intentionally don't call collateKernelComplete() here for the
* same reason as above.
*/
callOriginalCallback(false);
return;
}
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
int returnMode = static_cast<int>(engine.parent.device->currentReturnMode);
size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
returnMode);
size_t totalPoints = nSucceeded * pointsPerDgram;
// Count points with intensity greater than 116
size_t highIntensityCount = 0;
if (context->intensityStimFrame.has_value())
{
StimulusFrame& intensityFrame = context->intensityStimFrame->get();
float* intensityFloats = reinterpret_cast<float*>(intensityFrame.slotDesc.vaddr);
for (size_t i = 0; i < totalPoints; ++i)
{
float intensity = intensityFloats[i];
if (intensity >= 116.0f)
{
++highIntensityCount;
}
}
}
(void)highIntensityCount;
#if 0
std::cout << __func__ << ": intensityRingBufferIndex="
<< (context->intensityStimFrame.has_value() ?
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", pointsPerDgram=" << pointsPerDgram
<< ", nSucceeded=" << nSucceeded
<< ", totalPoints=" << totalPoints
<< ", highIntensityCount=" << highIntensityCount << std::endl;
#endif
callOriginalCallback(success);
}
};
@@ -1256,8 +1271,7 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
sscl::cps::Callback<compactCollateAndMeshFrameReqCbFn> callback)
{
{
sscl::SpinLock::Guard lock(shouldAcceptRequestsLock);
if (!shouldAcceptRequests)
if (openClCollMeshEngnCanceler.isCancellationRequested())
{
callback.callbackFn(false, stimulusFrame);
return;