ClCollMeshEngn: big reworks to clean up.

This commit is contained in:
2025-11-09 19:28:55 -04:00
parent b331af4f03
commit ad0b8058a4
2 changed files with 142 additions and 135 deletions
@@ -26,7 +26,7 @@ isSetup(false),
clAssemblyBuffer(nullptr), clAssemblyBuffer(nullptr),
clCollationBuffer(nullptr), clCollationBuffer(nullptr),
isRunning(false), isRunning(false),
currentKernelEvent(nullptr), currentCompactKernelEvent(nullptr), currentCollateKernelEvent(nullptr),
assemblyBufferPtr(nullptr), assemblyBufferPtr(nullptr),
assemblyBufferSize(0), assemblyBufferSize(0),
collationBufferPtr(nullptr), collationBufferPtr(nullptr),
@@ -139,8 +139,7 @@ bool OpenClCollatingAndMeshingEngine::setup()
} }
// Compile and prepare both kernels // Compile and prepare both kernels
if (!compileAndPrepareKernels()) if (!compileAndPrepareKernels()) {
{
goto cleanup; goto cleanup;
} }
@@ -212,7 +211,8 @@ void OpenClCollatingAndMeshingEngine::finalize()
platform = nullptr; platform = nullptr;
isSetup = false; isSetup = false;
isRunning = false; isRunning = false;
currentKernelEvent = nullptr; currentCompactKernelEvent = nullptr;
currentCollateKernelEvent = nullptr;
assemblyBufferPtr = nullptr; assemblyBufferPtr = nullptr;
assemblyBufferSize = 0; assemblyBufferSize = 0;
collationBufferPtr = nullptr; collationBufferPtr = nullptr;
@@ -222,7 +222,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Static callback for compact kernel event // Static callback for compact kernel event
void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback( void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data) cl_event /*event*/, cl_int event_command_exec_status, void* user_data)
{ {
OpenClCollatingAndMeshingEngine* engine = OpenClCollatingAndMeshingEngine* engine =
static_cast<OpenClCollatingAndMeshingEngine*>(user_data); static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
@@ -234,13 +234,13 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
if (engine->parent.device && engine->parent.device->componentThread) if (engine->parent.device && engine->parent.device->componentThread)
{ {
engine->parent.device->componentThread->getIoService().post( engine->parent.device->componentThread->getIoService().post(
engine->compactKernelCb); std::bind(engine->compactKernelCb, event_command_exec_status));
} }
} }
// Static callback for collate kernel event // Static callback for collate kernel event
void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback( void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data) cl_event /*event*/, cl_int event_command_exec_status, void* user_data)
{ {
OpenClCollatingAndMeshingEngine* engine = OpenClCollatingAndMeshingEngine* engine =
static_cast<OpenClCollatingAndMeshingEngine*>(user_data); static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
@@ -252,7 +252,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
if (engine->parent.device && engine->parent.device->componentThread) if (engine->parent.device && engine->parent.device->componentThread)
{ {
engine->parent.device->componentThread->getIoService().post( engine->parent.device->componentThread->getIoService().post(
engine->collateKernelCb); std::bind(engine->collateKernelCb, event_command_exec_status));
} }
} }
@@ -260,141 +260,74 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
StagingBuffer& assemblyBuff, uint32_t nSucceeded, StagingBuffer& assemblyBuff, uint32_t nSucceeded,
compactKernelCbFn callback) compactKernelCbFn callback)
{ {
if (!isSetup)
{
std::cerr << __func__ << ": engine not set up" << std::endl;
return false;
}
if (isRunning)
{
std::cerr << __func__ << ": already running, call stop() first"
<< std::endl;
return false;
}
// Validate buffers match what we set up
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize)
{
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
}
// Store the caller's callback // Store the caller's callback
compactKernelCb = callback; compactKernelCb = std::move(callback);
// Set up kernel arguments for slotCompactor // Validate buffers callable
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) { auto validateBuffers = [this, &assemblyBuff]() {
return false; struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
} if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize)
{
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
}
};
// Enqueue slotCompactor kernel execution (single work item for sequential processing) // Setup args callable
size_t globalWorkSize = 1; auto setupArgs = [this, &assemblyBuff, nSucceeded]() {
cl_int err = clEnqueueNDRangeKernel( return setupSlotCompactorsArgs(assemblyBuff, nSucceeded);
commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr, };
0, nullptr, &currentKernelEvent);
if (err != CL_SUCCESS) return startKernel(
{ slotCompactorKernel,
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: " &currentCompactKernelEvent,
<< err << std::endl; setupArgs,
return false; validateBuffers,
} 1, // globalWorkSize
compactKernelEventCallback,
// Set up callback using static member function "slotCompactor");
err = clSetEventCallback(
currentKernelEvent, CL_COMPLETE, compactKernelEventCallback, this);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set event callback: " << err
<< std::endl;
clReleaseEvent(currentKernelEvent);
currentKernelEvent = nullptr;
return false;
}
isRunning = true;
// startCompactKernel() is synchronous - it returns immediately after setting up kernel execution
// The callback will be invoked when the kernel completes
return true;
} }
bool OpenClCollatingAndMeshingEngine::startCollateKernel( bool OpenClCollatingAndMeshingEngine::startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
collateKernelCbFn callback) collateKernelCbFn callback)
{ {
if (!isSetup)
{
std::cerr << __func__ << ": engine not set up" << std::endl;
return false;
}
if (isRunning)
{
std::cerr << __func__ << ": already running, call stop() first"
<< std::endl;
return false;
}
// Validate buffers match what we set up
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
struct iovec collationIov = collationBuff.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize
|| collationIov.iov_base != collationBufferPtr
|| collationIov.iov_len != collationBufferSize)
{
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
}
// Store the caller's callback // Store the caller's callback
collateKernelCb = callback; collateKernelCb = std::move(callback);
// Set up kernel arguments for collateDgrams // Validate buffers callable
if (!setupCollateDgramsArgs(assemblyBuff)) { auto validateBuffers = [this, &assemblyBuff, &collationBuff]() {
return false; struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
} struct iovec collationIov = collationBuff.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize
|| collationIov.iov_base != collationBufferPtr
|| collationIov.iov_len != collationBufferSize)
{
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
}
};
// Enqueue collateDgrams kernel execution // Setup args callable
// NDRange is nDgramsPerFrame (one work item per slot) auto setupArgs = [this, &assemblyBuff]() {
return setupCollateDgramsArgs(assemblyBuff);
};
// Calculate global work size
uint32_t nDgramsPerFrame = static_cast<uint32_t>( uint32_t nDgramsPerFrame = static_cast<uint32_t>(
frameAssemblyDesc->numSlots); frameAssemblyDesc->numSlots);
size_t globalWorkSize = nDgramsPerFrame; size_t globalWorkSize = nDgramsPerFrame;
cl_int err = clEnqueueNDRangeKernel(
commandQueue, collateKernel, 1, nullptr, &globalWorkSize, nullptr,
0, nullptr, &currentKernelEvent);
if (err != CL_SUCCESS) return startKernel(
{ collateKernel,
std::cerr << __func__ << ": failed to enqueue collateDgrams kernel: " &currentCollateKernelEvent,
<< err << std::endl; setupArgs,
return false; validateBuffers,
} globalWorkSize,
collateKernelEventCallback,
// Set up callback using static member function "collateDgrams");
err = clSetEventCallback(
currentKernelEvent, CL_COMPLETE, collateKernelEventCallback, this);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set event callback: " << err
<< std::endl;
clReleaseEvent(currentKernelEvent);
currentKernelEvent = nullptr;
return false;
}
isRunning = true;
// startCollateKernel() is synchronous - it returns immediately after setting up kernel execution
// The callback will be invoked when the kernel completes
return true;
} }
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel( bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel(
@@ -621,14 +554,24 @@ void OpenClCollatingAndMeshingEngine::stop()
} }
// Cancel kernel execution if possible // Cancel kernel execution if possible
if (currentKernelEvent) if (currentCompactKernelEvent)
{ {
// Note: OpenCL doesn't have a standard way to cancel kernel execution // Note: OpenCL doesn't have a standard way to cancel kernel execution
// We can try to wait for it to complete or just release the event // We can try to wait for it to complete or just release the event
// For now, we'll just wait for it to complete // For now, we'll just wait for it to complete
clWaitForEvents(1, &currentKernelEvent); clWaitForEvents(1, &currentCompactKernelEvent);
clReleaseEvent(currentKernelEvent); clReleaseEvent(currentCompactKernelEvent);
currentKernelEvent = nullptr; currentCompactKernelEvent = nullptr;
}
if (currentCollateKernelEvent)
{
// Note: OpenCL doesn't have a standard way to cancel kernel execution
// We can try to wait for it to complete or just release the event
// For now, we'll just wait for it to complete
clWaitForEvents(1, &currentCollateKernelEvent);
clReleaseEvent(currentCollateKernelEvent);
currentCollateKernelEvent = nullptr;
} }
isRunning = false; isRunning = false;
@@ -637,13 +580,13 @@ void OpenClCollatingAndMeshingEngine::stop()
void OpenClCollatingAndMeshingEngine::stopCompactKernel() void OpenClCollatingAndMeshingEngine::stopCompactKernel()
{ {
stop(); stop();
compactKernelCb = [](){}; compactKernelCb = [](cl_int){};
} }
void OpenClCollatingAndMeshingEngine::stopCollateKernel() void OpenClCollatingAndMeshingEngine::stopCollateKernel()
{ {
stop(); stop();
collateKernelCb = [](){}; collateKernelCb = [](cl_int){};
} }
} // namespace stim_buff } // namespace stim_buff
@@ -6,6 +6,8 @@
#include <cstddef> #include <cstddef>
#include <memory> #include <memory>
#include <functional> #include <functional>
#include <iostream>
#include <stdexcept>
#define CL_TARGET_OPENCL_VERSION 300 #define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h> #include <CL/cl.h>
#include "stagingBuffer.h" #include "stagingBuffer.h"
@@ -36,8 +38,8 @@ public:
void finalize(); void finalize();
// Callback function types // Callback function types
typedef std::function<void()> compactKernelCbFn; typedef std::function<void(cl_int)> compactKernelCbFn;
typedef std::function<void()> collateKernelCbFn; typedef std::function<void(cl_int)> collateKernelCbFn;
bool startCompactKernel( bool startCompactKernel(
StagingBuffer& assemblyBuff, uint32_t nSucceeded, StagingBuffer& assemblyBuff, uint32_t nSucceeded,
@@ -70,7 +72,8 @@ private:
// State tracking // State tracking
bool isRunning; bool isRunning;
cl_event currentKernelEvent; cl_event currentCompactKernelEvent;
cl_event currentCollateKernelEvent;
// Memory tracking // Memory tracking
void* assemblyBufferPtr; void* assemblyBufferPtr;
@@ -99,6 +102,67 @@ private:
bool setupSlotCompactorsArgs( bool setupSlotCompactorsArgs(
StagingBuffer& assemblyBuff, uint32_t nSucceeded); StagingBuffer& assemblyBuff, uint32_t nSucceeded);
bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff); bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff);
// Unified kernel start function
template<typename SetupArgsFn, typename ValidateBuffersFn>
bool startKernel(
cl_kernel kernel,
cl_event* eventPtr,
SetupArgsFn setupArgsFn,
ValidateBuffersFn validateBuffersFn,
size_t globalWorkSize,
void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*),
const char* kernelName)
{
if (!isSetup)
{
std::cerr << __func__ << ": engine not set up" << std::endl;
return false;
}
if (isRunning)
{
std::cerr << __func__ << ": already running, call stop() first"
<< std::endl;
return false;
}
// Validate buffers
validateBuffersFn();
// Set up kernel arguments
if (!setupArgsFn()) {
return false;
}
// Enqueue kernel execution
cl_int err = clEnqueueNDRangeKernel(
commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr,
0, nullptr, eventPtr);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to enqueue " << kernelName
<< " kernel: " << err << std::endl;
return false;
}
// Set up callback using static member function
err = clSetEventCallback(
*eventPtr, CL_COMPLETE, eventCallback, this);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set event callback: " << err
<< std::endl;
clReleaseEvent(*eventPtr);
*eventPtr = nullptr;
return false;
}
isRunning = true;
return true;
}
}; };
} // namespace stim_buff } // namespace stim_buff