ClCollMeshEngn: Split start into start[Collate|Compact]Kernel()
These prepare each kernel separately. We'll unify them further.
This commit is contained in:
@@ -220,28 +220,112 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
|||||||
frameAssemblyDesc = nullptr;
|
frameAssemblyDesc = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Static callback for OpenCL event
|
// Static callback for compact kernel event
|
||||||
void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback(
|
void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
|
||||||
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data)
|
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data)
|
||||||
{
|
{
|
||||||
OpenClCollatingAndMeshingEngine* engine =
|
OpenClCollatingAndMeshingEngine* engine =
|
||||||
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
||||||
|
|
||||||
if (!engine || !engine->isRunning || !engine->collateFrameReqCb)
|
if (!engine || !engine->isRunning || !engine->compactKernelCb)
|
||||||
{ return; }
|
{ return; }
|
||||||
|
|
||||||
// Post to io_service to call callback on the correct thread
|
// Post to io_service to call callback on the correct thread
|
||||||
if (engine->parent.device && engine->parent.device->componentThread)
|
if (engine->parent.device && engine->parent.device->componentThread)
|
||||||
{
|
{
|
||||||
engine->parent.device->componentThread->getIoService().post(
|
engine->parent.device->componentThread->getIoService().post(
|
||||||
engine->collateFrameReqCb);
|
engine->compactKernelCb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OpenClCollatingAndMeshingEngine::start(
|
// Static callback for collate kernel event
|
||||||
|
void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
|
||||||
|
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data)
|
||||||
|
{
|
||||||
|
OpenClCollatingAndMeshingEngine* engine =
|
||||||
|
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
||||||
|
|
||||||
|
if (!engine || !engine->isRunning || !engine->collateKernelCb)
|
||||||
|
{ return; }
|
||||||
|
|
||||||
|
// Post to io_service to call callback on the correct thread
|
||||||
|
if (engine->parent.device && engine->parent.device->componentThread)
|
||||||
|
{
|
||||||
|
engine->parent.device->componentThread->getIoService().post(
|
||||||
|
engine->collateKernelCb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OpenClCollatingAndMeshingEngine::startCompactKernel(
|
||||||
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
||||||
|
compactKernelCbFn callback)
|
||||||
|
{
|
||||||
|
if (!isSetup)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": engine not set up" << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isRunning)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": already running, call stop() first"
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate buffers match what we set up
|
||||||
|
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
|
||||||
|
|
||||||
|
if (assemblyIov.iov_base != assemblyBufferPtr
|
||||||
|
|| assemblyIov.iov_len != assemblyBufferSize)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(
|
||||||
|
std::string(__func__) + ": buffer mismatch - buffers have changed");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the caller's callback
|
||||||
|
compactKernelCb = callback;
|
||||||
|
|
||||||
|
// Set up kernel arguments for slotCompactor
|
||||||
|
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
|
||||||
|
size_t globalWorkSize = 1;
|
||||||
|
cl_int err = clEnqueueNDRangeKernel(
|
||||||
|
commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr,
|
||||||
|
0, nullptr, ¤tKernelEvent);
|
||||||
|
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: "
|
||||||
|
<< err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up callback using static member function
|
||||||
|
err = clSetEventCallback(
|
||||||
|
currentKernelEvent, CL_COMPLETE, compactKernelEventCallback, this);
|
||||||
|
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set event callback: " << err
|
||||||
|
<< std::endl;
|
||||||
|
clReleaseEvent(currentKernelEvent);
|
||||||
|
currentKernelEvent = nullptr;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
isRunning = true;
|
||||||
|
// startCompactKernel() is synchronous - it returns immediately after setting up kernel execution
|
||||||
|
// The callback will be invoked when the kernel completes
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
collateFrameReqCbFn callback,
|
collateKernelCbFn callback)
|
||||||
uint32_t nSucceeded)
|
|
||||||
{
|
{
|
||||||
if (!isSetup)
|
if (!isSetup)
|
||||||
{
|
{
|
||||||
@@ -270,37 +354,33 @@ bool OpenClCollatingAndMeshingEngine::start(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Store the caller's callback
|
// Store the caller's callback
|
||||||
collateFrameReqCb = callback;
|
collateKernelCb = callback;
|
||||||
|
|
||||||
// Set up kernel arguments for slotCompactor
|
|
||||||
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Set up kernel arguments for collateDgrams
|
// Set up kernel arguments for collateDgrams
|
||||||
if (!setupCollateDgramsArgs(assemblyBuff)) {
|
if (!setupCollateDgramsArgs(assemblyBuff)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
|
// Enqueue collateDgrams kernel execution
|
||||||
size_t globalWorkSize = 1;
|
// NDRange is nDgramsPerFrame (one work item per slot)
|
||||||
|
uint32_t nDgramsPerFrame = static_cast<uint32_t>(
|
||||||
|
frameAssemblyDesc->numSlots);
|
||||||
|
|
||||||
|
size_t globalWorkSize = nDgramsPerFrame;
|
||||||
cl_int err = clEnqueueNDRangeKernel(
|
cl_int err = clEnqueueNDRangeKernel(
|
||||||
commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr,
|
commandQueue, collateKernel, 1, nullptr, &globalWorkSize, nullptr,
|
||||||
0, nullptr, ¤tKernelEvent);
|
0, nullptr, ¤tKernelEvent);
|
||||||
|
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: "
|
std::cerr << __func__ << ": failed to enqueue collateDgrams kernel: "
|
||||||
<< err << std::endl;
|
<< err << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set up callback using static member function
|
// Set up callback using static member function
|
||||||
// We need to pass 'this' as user_data, but we need a shared_ptr
|
|
||||||
// For now, we'll use a workaround: store 'this' and use it carefully
|
|
||||||
// Actually, we should use a different approach - use a shared_ptr wrapper
|
|
||||||
// But for now, let's use a simpler approach with proper lifetime management
|
|
||||||
err = clSetEventCallback(
|
err = clSetEventCallback(
|
||||||
currentKernelEvent, CL_COMPLETE, kernelEventCallback, this);
|
currentKernelEvent, CL_COMPLETE, collateKernelEventCallback, this);
|
||||||
|
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
@@ -311,11 +391,8 @@ bool OpenClCollatingAndMeshingEngine::start(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Set up timeout timer in continuation class
|
|
||||||
// For now, timeout handling will be in the CollateFrameReq continuation
|
|
||||||
|
|
||||||
isRunning = true;
|
isRunning = true;
|
||||||
// start() is synchronous - it returns immediately after setting up kernel execution
|
// startCollateKernel() is synchronous - it returns immediately after setting up kernel execution
|
||||||
// The callback will be invoked when the kernel completes
|
// The callback will be invoked when the kernel completes
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -555,7 +632,18 @@ void OpenClCollatingAndMeshingEngine::stop()
|
|||||||
}
|
}
|
||||||
|
|
||||||
isRunning = false;
|
isRunning = false;
|
||||||
collateFrameReqCb = nullptr;
|
}
|
||||||
|
|
||||||
|
void OpenClCollatingAndMeshingEngine::stopCompactKernel()
|
||||||
|
{
|
||||||
|
stop();
|
||||||
|
compactKernelCb = [](){};
|
||||||
|
}
|
||||||
|
|
||||||
|
void OpenClCollatingAndMeshingEngine::stopCollateKernel()
|
||||||
|
{
|
||||||
|
stop();
|
||||||
|
collateKernelCb = [](){};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace stim_buff
|
} // namespace stim_buff
|
||||||
|
|||||||
@@ -35,13 +35,19 @@ public:
|
|||||||
bool setup();
|
bool setup();
|
||||||
void finalize();
|
void finalize();
|
||||||
|
|
||||||
// Callback function type for collateFrameReq
|
// Callback function types
|
||||||
typedef std::function<void()> collateFrameReqCbFn;
|
typedef std::function<void()> compactKernelCbFn;
|
||||||
|
typedef std::function<void()> collateKernelCbFn;
|
||||||
|
|
||||||
bool start(
|
bool startCompactKernel(
|
||||||
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
||||||
|
compactKernelCbFn callback);
|
||||||
|
bool startCollateKernel(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
collateFrameReqCbFn callback,
|
collateKernelCbFn callback);
|
||||||
uint32_t nSucceeded);
|
|
||||||
|
void stopCompactKernel();
|
||||||
|
void stopCollateKernel();
|
||||||
void stop();
|
void stop();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -76,10 +82,13 @@ private:
|
|||||||
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
|
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
|
||||||
|
|
||||||
// Callback storage
|
// Callback storage
|
||||||
collateFrameReqCbFn collateFrameReqCb;
|
compactKernelCbFn compactKernelCb;
|
||||||
|
collateKernelCbFn collateKernelCb;
|
||||||
|
|
||||||
// Static callback for OpenCL event
|
// Static callbacks for OpenCL events
|
||||||
static void CL_CALLBACK kernelEventCallback(
|
static void CL_CALLBACK compactKernelEventCallback(
|
||||||
|
cl_event event, cl_int event_command_exec_status, void* user_data);
|
||||||
|
static void CL_CALLBACK collateKernelEventCallback(
|
||||||
cl_event event, cl_int event_command_exec_status, void* user_data);
|
cl_event event, cl_int event_command_exec_status, void* user_data);
|
||||||
|
|
||||||
// Private helper methods
|
// Private helper methods
|
||||||
|
|||||||
Reference in New Issue
Block a user