OClCollMeshEngn: Add method compactCollateAndMeshFrameReq

This method takes an input assembly buffer and selects which
OpenCL kernels need to be executed on that buffer to transform
the input data into the eventual output StimulusFrame for the
current timeslice period.
This commit is contained in:
2025-11-10 00:58:48 -04:00
parent 19a79faabe
commit eedeb4b803
2 changed files with 152 additions and 3 deletions
@@ -4,6 +4,12 @@
#include <cstring>
#include <vector>
#include <boost/system/error_code.hpp>
#include <asynchronousContinuation.h>
#include <callback.h>
#include <asynchronousLoop.h>
#include <componentThread.h>
#include <user/stimulusFrame.h>
#include "livoxGen1.h"
#include "openClCollatingAndMeshingEngine.h"
#include "pcloudStimulusBuffer.h"
#include "openClKernels.h"
@@ -234,7 +240,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
{ return; }
// Stop the compact kernel
engine->stopCompactKernel();
// engine->stopCompactKernel();
// Post to io_service to call callback on the correct thread
if (engine->parent.device && engine->parent.device->componentThread)
@@ -255,7 +261,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
{ return; }
// Stop the collate kernel
engine->stopCollateKernel();
// engine->stopCollateKernel();
// Post to io_service to call callback on the correct thread
if (engine->parent.device && engine->parent.device->componentThread)
@@ -592,6 +598,137 @@ void OpenClCollatingAndMeshingEngine::stopCollateKernel()
collateKernelCb = [](cl_int){};
}
class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq
: public PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>
{
private:
OpenClCollatingAndMeshingEngine& engine;
AsynchronousLoop frameAssemblyResult;
StimulusFrame& stimulusFrame;
public:
CompactCollateAndMeshFrameReq(
OpenClCollatingAndMeshingEngine& engine_,
AsynchronousLoop& asyncLoop,
StimulusFrame& stimulusFrame_,
const std::shared_ptr<ComponentThread>& caller,
Callback<compactCollateAndMeshFrameReqCbFn> cb)
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
caller, cb),
engine(engine_),
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_)
{}
public:
void callOriginalCallback(bool success)
{ callOriginalCb(success, std::ref(stimulusFrame)); }
public:
void compactCollateAndMeshFrameReq1_doCompact_posted(
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
{
bool success = engine.startCompactKernel(
engine.parent.assemblyBuffer,
static_cast<uint32_t>(context->frameAssemblyResult.nSucceeded.load()),
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq2_compactDone_posted,
context.get(), context,
std::placeholders::_1));
if (!success)
{
callOriginalCallback(false);
return;
}
}
void compactCollateAndMeshFrameReq2_compactDone_posted(
std::shared_ptr<CompactCollateAndMeshFrameReq> context,
cl_int compactStatus)
{
engine.stopCompactKernel();
// If compact failed, call callback directly with failure
if (compactStatus != CL_SUCCESS)
{
callOriginalCallback(false);
return;
}
// Print first 4 bytes of each slot
if (engine.frameAssemblyDesc)
{
for (size_t i = 0; i < engine.frameAssemblyDesc->numSlots; ++i) {
engine.parent.ioUringAssemblyEngine.printSlotBytes(i, 4);
}
}
context->compactCollateAndMeshFrameReq3_doCollate_posted(context);
}
void compactCollateAndMeshFrameReq3_doCollate_posted(
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
{
bool success = engine.startCollateKernel(
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
context.get(), context,
std::placeholders::_1));
if (!success)
{
callOriginalCallback(false);
return;
}
}
void compactCollateAndMeshFrameReq4_collateDone_maybePosted(
[[maybe_unused]] std::shared_ptr<CompactCollateAndMeshFrameReq> context,
cl_int collateStatus)
{
engine.stopCollateKernel();
bool success = (collateStatus == CL_SUCCESS);
callOriginalCallback(success);
}
};
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback)
{
auto caller = smoHooksPtr->ComponentThread_getSelf();
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
*this, asyncLoop, stimulusFrame,
caller,
std::move(callback));
// Check if compaction is needed
bool needsCompaction = IoUringAssemblyEngine::compactionIsNeeded(
asyncLoop.nSucceeded.load(), asyncLoop.nTotal);
// Start with compaction if needed, then chain to collation
if (needsCompaction)
{
parent.device->componentThread->getIoService().post(
STC(std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq1_doCompact_posted,
request.get(), request)));
}
else
{
// Skip compaction, go straight to collation
parent.device->componentThread->getIoService().post(
STC(std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq3_doCollate_posted,
request.get(), request)));
}
}
} // namespace stim_buff
} // namespace smo
@@ -10,6 +10,9 @@
#include <stdexcept>
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
#include <asynchronousLoop.h>
#include <callback.h>
#include <user/stimulusFrame.h>
#include "stagingBuffer.h"
#include "frameAssemblyDesc.h"
@@ -40,6 +43,8 @@ public:
// Callback function types
typedef std::function<void(cl_int)> compactKernelCbFn;
typedef std::function<void(cl_int)> collateKernelCbFn;
typedef std::function<void(bool, StimulusFrame&)>
compactCollateAndMeshFrameReqCbFn;
bool startCompactKernel(
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
@@ -52,6 +57,10 @@ public:
void stopCollateKernel();
void stop();
void compactCollateAndMeshFrameReq(
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback);
private:
PcloudStimulusBuffer& parent;
@@ -106,6 +115,9 @@ private:
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff);
// Forward declaration for continuation class
class CompactCollateAndMeshFrameReq;
// Unified kernel start function
template<typename SetupArgsFn, typename ValidateBuffersFn>
bool startKernel(