ClCollMeshEngn: Split start into start[Collate|Compact]Kernel()

These prepare each kernel separately. We'll unify them further.
This commit is contained in:
2025-11-09 16:12:10 -04:00
parent 683e107b04
commit b331af4f03
2 changed files with 131 additions and 34 deletions
@@ -220,28 +220,112 @@ void OpenClCollatingAndMeshingEngine::finalize()
frameAssemblyDesc = nullptr;
}
// Static callback for OpenCL event
void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback(
// Static callback for compact kernel event
void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data)
{
OpenClCollatingAndMeshingEngine* engine =
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
if (!engine || !engine->isRunning || !engine->collateFrameReqCb)
if (!engine || !engine->isRunning || !engine->compactKernelCb)
{ return; }
// Post to io_service to call callback on the correct thread
if (engine->parent.device && engine->parent.device->componentThread)
{
engine->parent.device->componentThread->getIoService().post(
engine->collateFrameReqCb);
engine->compactKernelCb);
}
}
bool OpenClCollatingAndMeshingEngine::start(
// Static callback for collate kernel event
void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data)
{
OpenClCollatingAndMeshingEngine* engine =
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
if (!engine || !engine->isRunning || !engine->collateKernelCb)
{ return; }
// Post to io_service to call callback on the correct thread
if (engine->parent.device && engine->parent.device->componentThread)
{
engine->parent.device->componentThread->getIoService().post(
engine->collateKernelCb);
}
}
bool OpenClCollatingAndMeshingEngine::startCompactKernel(
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
compactKernelCbFn callback)
{
if (!isSetup)
{
std::cerr << __func__ << ": engine not set up" << std::endl;
return false;
}
if (isRunning)
{
std::cerr << __func__ << ": already running, call stop() first"
<< std::endl;
return false;
}
// Validate buffers match what we set up
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize)
{
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
}
// Store the caller's callback
compactKernelCb = callback;
// Set up kernel arguments for slotCompactor
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
return false;
}
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
size_t globalWorkSize = 1;
cl_int err = clEnqueueNDRangeKernel(
commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr,
0, nullptr, &currentKernelEvent);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: "
<< err << std::endl;
return false;
}
// Set up callback using static member function
err = clSetEventCallback(
currentKernelEvent, CL_COMPLETE, compactKernelEventCallback, this);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set event callback: " << err
<< std::endl;
clReleaseEvent(currentKernelEvent);
currentKernelEvent = nullptr;
return false;
}
isRunning = true;
// startCompactKernel() is synchronous - it returns immediately after setting up kernel execution
// The callback will be invoked when the kernel completes
return true;
}
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
collateFrameReqCbFn callback,
uint32_t nSucceeded)
collateKernelCbFn callback)
{
if (!isSetup)
{
@@ -270,37 +354,33 @@ bool OpenClCollatingAndMeshingEngine::start(
}
// Store the caller's callback
collateFrameReqCb = callback;
collateKernelCb = callback;
// Set up kernel arguments for slotCompactor
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
return false;
}
// Set up kernel arguments for collateDgrams
if (!setupCollateDgramsArgs(assemblyBuff)) {
return false;
}
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
size_t globalWorkSize = 1;
// Enqueue collateDgrams kernel execution
// NDRange is nDgramsPerFrame (one work item per slot)
uint32_t nDgramsPerFrame = static_cast<uint32_t>(
frameAssemblyDesc->numSlots);
size_t globalWorkSize = nDgramsPerFrame;
cl_int err = clEnqueueNDRangeKernel(
commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr,
commandQueue, collateKernel, 1, nullptr, &globalWorkSize, nullptr,
0, nullptr, &currentKernelEvent);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: "
std::cerr << __func__ << ": failed to enqueue collateDgrams kernel: "
<< err << std::endl;
return false;
}
// Set up callback using static member function
// We need to pass 'this' as user_data, but we need a shared_ptr
// For now, we'll use a workaround: store 'this' and use it carefully
// Actually, we should use a different approach - use a shared_ptr wrapper
// But for now, let's use a simpler approach with proper lifetime management
err = clSetEventCallback(
currentKernelEvent, CL_COMPLETE, kernelEventCallback, this);
currentKernelEvent, CL_COMPLETE, collateKernelEventCallback, this);
if (err != CL_SUCCESS)
{
@@ -311,11 +391,8 @@ bool OpenClCollatingAndMeshingEngine::start(
return false;
}
// TODO: Set up timeout timer in continuation class
// For now, timeout handling will be in the CollateFrameReq continuation
isRunning = true;
// start() is synchronous - it returns immediately after setting up kernel execution
// startCollateKernel() is synchronous - it returns immediately after setting up kernel execution
// The callback will be invoked when the kernel completes
return true;
}
@@ -555,7 +632,18 @@ void OpenClCollatingAndMeshingEngine::stop()
}
isRunning = false;
collateFrameReqCb = nullptr;
}
void OpenClCollatingAndMeshingEngine::stopCompactKernel()
{
stop();
compactKernelCb = [](){};
}
void OpenClCollatingAndMeshingEngine::stopCollateKernel()
{
stop();
collateKernelCb = [](){};
}
} // namespace stim_buff
@@ -35,13 +35,19 @@ public:
bool setup();
void finalize();
// Callback function type for collateFrameReq
typedef std::function<void()> collateFrameReqCbFn;
// Callback function types
typedef std::function<void()> compactKernelCbFn;
typedef std::function<void()> collateKernelCbFn;
bool start(
bool startCompactKernel(
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
compactKernelCbFn callback);
bool startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
collateFrameReqCbFn callback,
uint32_t nSucceeded);
collateKernelCbFn callback);
void stopCompactKernel();
void stopCollateKernel();
void stop();
private:
@@ -76,10 +82,13 @@ private:
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
// Callback storage
collateFrameReqCbFn collateFrameReqCb;
compactKernelCbFn compactKernelCb;
collateKernelCbFn collateKernelCb;
// Static callback for OpenCL event
static void CL_CALLBACK kernelEventCallback(
// Static callbacks for OpenCL events
static void CL_CALLBACK compactKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);
static void CL_CALLBACK collateKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);
// Private helper methods