ClCollMeshEngn: big reworks to clean up.
This commit is contained in:
@@ -26,7 +26,7 @@ isSetup(false),
|
|||||||
clAssemblyBuffer(nullptr),
|
clAssemblyBuffer(nullptr),
|
||||||
clCollationBuffer(nullptr),
|
clCollationBuffer(nullptr),
|
||||||
isRunning(false),
|
isRunning(false),
|
||||||
currentKernelEvent(nullptr),
|
currentCompactKernelEvent(nullptr), currentCollateKernelEvent(nullptr),
|
||||||
assemblyBufferPtr(nullptr),
|
assemblyBufferPtr(nullptr),
|
||||||
assemblyBufferSize(0),
|
assemblyBufferSize(0),
|
||||||
collationBufferPtr(nullptr),
|
collationBufferPtr(nullptr),
|
||||||
@@ -139,8 +139,7 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Compile and prepare both kernels
|
// Compile and prepare both kernels
|
||||||
if (!compileAndPrepareKernels())
|
if (!compileAndPrepareKernels()) {
|
||||||
{
|
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -212,7 +211,8 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
|||||||
platform = nullptr;
|
platform = nullptr;
|
||||||
isSetup = false;
|
isSetup = false;
|
||||||
isRunning = false;
|
isRunning = false;
|
||||||
currentKernelEvent = nullptr;
|
currentCompactKernelEvent = nullptr;
|
||||||
|
currentCollateKernelEvent = nullptr;
|
||||||
assemblyBufferPtr = nullptr;
|
assemblyBufferPtr = nullptr;
|
||||||
assemblyBufferSize = 0;
|
assemblyBufferSize = 0;
|
||||||
collationBufferPtr = nullptr;
|
collationBufferPtr = nullptr;
|
||||||
@@ -222,7 +222,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
|||||||
|
|
||||||
// Static callback for compact kernel event
|
// Static callback for compact kernel event
|
||||||
void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
|
void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
|
||||||
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data)
|
cl_event /*event*/, cl_int event_command_exec_status, void* user_data)
|
||||||
{
|
{
|
||||||
OpenClCollatingAndMeshingEngine* engine =
|
OpenClCollatingAndMeshingEngine* engine =
|
||||||
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
||||||
@@ -234,13 +234,13 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
|
|||||||
if (engine->parent.device && engine->parent.device->componentThread)
|
if (engine->parent.device && engine->parent.device->componentThread)
|
||||||
{
|
{
|
||||||
engine->parent.device->componentThread->getIoService().post(
|
engine->parent.device->componentThread->getIoService().post(
|
||||||
engine->compactKernelCb);
|
std::bind(engine->compactKernelCb, event_command_exec_status));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Static callback for collate kernel event
|
// Static callback for collate kernel event
|
||||||
void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
|
void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
|
||||||
cl_event /*event*/, cl_int /*event_command_exec_status*/, void* user_data)
|
cl_event /*event*/, cl_int event_command_exec_status, void* user_data)
|
||||||
{
|
{
|
||||||
OpenClCollatingAndMeshingEngine* engine =
|
OpenClCollatingAndMeshingEngine* engine =
|
||||||
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
||||||
@@ -252,7 +252,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
|
|||||||
if (engine->parent.device && engine->parent.device->componentThread)
|
if (engine->parent.device && engine->parent.device->componentThread)
|
||||||
{
|
{
|
||||||
engine->parent.device->componentThread->getIoService().post(
|
engine->parent.device->componentThread->getIoService().post(
|
||||||
engine->collateKernelCb);
|
std::bind(engine->collateKernelCb, event_command_exec_status));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -260,141 +260,74 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
|
|||||||
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
||||||
compactKernelCbFn callback)
|
compactKernelCbFn callback)
|
||||||
{
|
{
|
||||||
if (!isSetup)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": engine not set up" << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isRunning)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": already running, call stop() first"
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate buffers match what we set up
|
|
||||||
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
|
|
||||||
|
|
||||||
if (assemblyIov.iov_base != assemblyBufferPtr
|
|
||||||
|| assemblyIov.iov_len != assemblyBufferSize)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(
|
|
||||||
std::string(__func__) + ": buffer mismatch - buffers have changed");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the caller's callback
|
// Store the caller's callback
|
||||||
compactKernelCb = callback;
|
compactKernelCb = std::move(callback);
|
||||||
|
|
||||||
// Set up kernel arguments for slotCompactor
|
// Validate buffers callable
|
||||||
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
|
auto validateBuffers = [this, &assemblyBuff]() {
|
||||||
return false;
|
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
|
||||||
}
|
if (assemblyIov.iov_base != assemblyBufferPtr
|
||||||
|
|| assemblyIov.iov_len != assemblyBufferSize)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(
|
||||||
|
std::string(__func__) + ": buffer mismatch - buffers have changed");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
|
// Setup args callable
|
||||||
size_t globalWorkSize = 1;
|
auto setupArgs = [this, &assemblyBuff, nSucceeded]() {
|
||||||
cl_int err = clEnqueueNDRangeKernel(
|
return setupSlotCompactorsArgs(assemblyBuff, nSucceeded);
|
||||||
commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr,
|
};
|
||||||
0, nullptr, ¤tKernelEvent);
|
|
||||||
|
|
||||||
if (err != CL_SUCCESS)
|
return startKernel(
|
||||||
{
|
slotCompactorKernel,
|
||||||
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: "
|
¤tCompactKernelEvent,
|
||||||
<< err << std::endl;
|
setupArgs,
|
||||||
return false;
|
validateBuffers,
|
||||||
}
|
1, // globalWorkSize
|
||||||
|
compactKernelEventCallback,
|
||||||
// Set up callback using static member function
|
"slotCompactor");
|
||||||
err = clSetEventCallback(
|
|
||||||
currentKernelEvent, CL_COMPLETE, compactKernelEventCallback, this);
|
|
||||||
|
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to set event callback: " << err
|
|
||||||
<< std::endl;
|
|
||||||
clReleaseEvent(currentKernelEvent);
|
|
||||||
currentKernelEvent = nullptr;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
isRunning = true;
|
|
||||||
// startCompactKernel() is synchronous - it returns immediately after setting up kernel execution
|
|
||||||
// The callback will be invoked when the kernel completes
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
collateKernelCbFn callback)
|
collateKernelCbFn callback)
|
||||||
{
|
{
|
||||||
if (!isSetup)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": engine not set up" << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isRunning)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": already running, call stop() first"
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate buffers match what we set up
|
|
||||||
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
|
|
||||||
struct iovec collationIov = collationBuff.getClEngineIovec();
|
|
||||||
|
|
||||||
if (assemblyIov.iov_base != assemblyBufferPtr
|
|
||||||
|| assemblyIov.iov_len != assemblyBufferSize
|
|
||||||
|| collationIov.iov_base != collationBufferPtr
|
|
||||||
|| collationIov.iov_len != collationBufferSize)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(
|
|
||||||
std::string(__func__) + ": buffer mismatch - buffers have changed");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the caller's callback
|
// Store the caller's callback
|
||||||
collateKernelCb = callback;
|
collateKernelCb = std::move(callback);
|
||||||
|
|
||||||
// Set up kernel arguments for collateDgrams
|
// Validate buffers callable
|
||||||
if (!setupCollateDgramsArgs(assemblyBuff)) {
|
auto validateBuffers = [this, &assemblyBuff, &collationBuff]() {
|
||||||
return false;
|
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
|
||||||
}
|
struct iovec collationIov = collationBuff.getClEngineIovec();
|
||||||
|
if (assemblyIov.iov_base != assemblyBufferPtr
|
||||||
|
|| assemblyIov.iov_len != assemblyBufferSize
|
||||||
|
|| collationIov.iov_base != collationBufferPtr
|
||||||
|
|| collationIov.iov_len != collationBufferSize)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(
|
||||||
|
std::string(__func__) + ": buffer mismatch - buffers have changed");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Enqueue collateDgrams kernel execution
|
// Setup args callable
|
||||||
// NDRange is nDgramsPerFrame (one work item per slot)
|
auto setupArgs = [this, &assemblyBuff]() {
|
||||||
|
return setupCollateDgramsArgs(assemblyBuff);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate global work size
|
||||||
uint32_t nDgramsPerFrame = static_cast<uint32_t>(
|
uint32_t nDgramsPerFrame = static_cast<uint32_t>(
|
||||||
frameAssemblyDesc->numSlots);
|
frameAssemblyDesc->numSlots);
|
||||||
|
|
||||||
size_t globalWorkSize = nDgramsPerFrame;
|
size_t globalWorkSize = nDgramsPerFrame;
|
||||||
cl_int err = clEnqueueNDRangeKernel(
|
|
||||||
commandQueue, collateKernel, 1, nullptr, &globalWorkSize, nullptr,
|
|
||||||
0, nullptr, ¤tKernelEvent);
|
|
||||||
|
|
||||||
if (err != CL_SUCCESS)
|
return startKernel(
|
||||||
{
|
collateKernel,
|
||||||
std::cerr << __func__ << ": failed to enqueue collateDgrams kernel: "
|
¤tCollateKernelEvent,
|
||||||
<< err << std::endl;
|
setupArgs,
|
||||||
return false;
|
validateBuffers,
|
||||||
}
|
globalWorkSize,
|
||||||
|
collateKernelEventCallback,
|
||||||
// Set up callback using static member function
|
"collateDgrams");
|
||||||
err = clSetEventCallback(
|
|
||||||
currentKernelEvent, CL_COMPLETE, collateKernelEventCallback, this);
|
|
||||||
|
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to set event callback: " << err
|
|
||||||
<< std::endl;
|
|
||||||
clReleaseEvent(currentKernelEvent);
|
|
||||||
currentKernelEvent = nullptr;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
isRunning = true;
|
|
||||||
// startCollateKernel() is synchronous - it returns immediately after setting up kernel execution
|
|
||||||
// The callback will be invoked when the kernel completes
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel(
|
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel(
|
||||||
@@ -621,14 +554,24 @@ void OpenClCollatingAndMeshingEngine::stop()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Cancel kernel execution if possible
|
// Cancel kernel execution if possible
|
||||||
if (currentKernelEvent)
|
if (currentCompactKernelEvent)
|
||||||
{
|
{
|
||||||
// Note: OpenCL doesn't have a standard way to cancel kernel execution
|
// Note: OpenCL doesn't have a standard way to cancel kernel execution
|
||||||
// We can try to wait for it to complete or just release the event
|
// We can try to wait for it to complete or just release the event
|
||||||
// For now, we'll just wait for it to complete
|
// For now, we'll just wait for it to complete
|
||||||
clWaitForEvents(1, ¤tKernelEvent);
|
clWaitForEvents(1, ¤tCompactKernelEvent);
|
||||||
clReleaseEvent(currentKernelEvent);
|
clReleaseEvent(currentCompactKernelEvent);
|
||||||
currentKernelEvent = nullptr;
|
currentCompactKernelEvent = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentCollateKernelEvent)
|
||||||
|
{
|
||||||
|
// Note: OpenCL doesn't have a standard way to cancel kernel execution
|
||||||
|
// We can try to wait for it to complete or just release the event
|
||||||
|
// For now, we'll just wait for it to complete
|
||||||
|
clWaitForEvents(1, ¤tCollateKernelEvent);
|
||||||
|
clReleaseEvent(currentCollateKernelEvent);
|
||||||
|
currentCollateKernelEvent = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
isRunning = false;
|
isRunning = false;
|
||||||
@@ -637,13 +580,13 @@ void OpenClCollatingAndMeshingEngine::stop()
|
|||||||
void OpenClCollatingAndMeshingEngine::stopCompactKernel()
|
void OpenClCollatingAndMeshingEngine::stopCompactKernel()
|
||||||
{
|
{
|
||||||
stop();
|
stop();
|
||||||
compactKernelCb = [](){};
|
compactKernelCb = [](cl_int){};
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenClCollatingAndMeshingEngine::stopCollateKernel()
|
void OpenClCollatingAndMeshingEngine::stopCollateKernel()
|
||||||
{
|
{
|
||||||
stop();
|
stop();
|
||||||
collateKernelCb = [](){};
|
collateKernelCb = [](cl_int){};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace stim_buff
|
} // namespace stim_buff
|
||||||
|
|||||||
@@ -6,6 +6,8 @@
|
|||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <iostream>
|
||||||
|
#include <stdexcept>
|
||||||
#define CL_TARGET_OPENCL_VERSION 300
|
#define CL_TARGET_OPENCL_VERSION 300
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#include "stagingBuffer.h"
|
#include "stagingBuffer.h"
|
||||||
@@ -36,8 +38,8 @@ public:
|
|||||||
void finalize();
|
void finalize();
|
||||||
|
|
||||||
// Callback function types
|
// Callback function types
|
||||||
typedef std::function<void()> compactKernelCbFn;
|
typedef std::function<void(cl_int)> compactKernelCbFn;
|
||||||
typedef std::function<void()> collateKernelCbFn;
|
typedef std::function<void(cl_int)> collateKernelCbFn;
|
||||||
|
|
||||||
bool startCompactKernel(
|
bool startCompactKernel(
|
||||||
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
||||||
@@ -70,7 +72,8 @@ private:
|
|||||||
|
|
||||||
// State tracking
|
// State tracking
|
||||||
bool isRunning;
|
bool isRunning;
|
||||||
cl_event currentKernelEvent;
|
cl_event currentCompactKernelEvent;
|
||||||
|
cl_event currentCollateKernelEvent;
|
||||||
|
|
||||||
// Memory tracking
|
// Memory tracking
|
||||||
void* assemblyBufferPtr;
|
void* assemblyBufferPtr;
|
||||||
@@ -99,6 +102,67 @@ private:
|
|||||||
bool setupSlotCompactorsArgs(
|
bool setupSlotCompactorsArgs(
|
||||||
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
|
||||||
bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff);
|
bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff);
|
||||||
|
|
||||||
|
// Unified kernel start function
|
||||||
|
template<typename SetupArgsFn, typename ValidateBuffersFn>
|
||||||
|
bool startKernel(
|
||||||
|
cl_kernel kernel,
|
||||||
|
cl_event* eventPtr,
|
||||||
|
SetupArgsFn setupArgsFn,
|
||||||
|
ValidateBuffersFn validateBuffersFn,
|
||||||
|
size_t globalWorkSize,
|
||||||
|
void (CL_CALLBACK *eventCallback)(cl_event, cl_int, void*),
|
||||||
|
const char* kernelName)
|
||||||
|
{
|
||||||
|
if (!isSetup)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": engine not set up" << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isRunning)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": already running, call stop() first"
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate buffers
|
||||||
|
validateBuffersFn();
|
||||||
|
|
||||||
|
// Set up kernel arguments
|
||||||
|
if (!setupArgsFn()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enqueue kernel execution
|
||||||
|
cl_int err = clEnqueueNDRangeKernel(
|
||||||
|
commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr,
|
||||||
|
0, nullptr, eventPtr);
|
||||||
|
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to enqueue " << kernelName
|
||||||
|
<< " kernel: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up callback using static member function
|
||||||
|
err = clSetEventCallback(
|
||||||
|
*eventPtr, CL_COMPLETE, eventCallback, this);
|
||||||
|
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set event callback: " << err
|
||||||
|
<< std::endl;
|
||||||
|
clReleaseEvent(*eventPtr);
|
||||||
|
*eventPtr = nullptr;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
isRunning = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace stim_buff
|
} // namespace stim_buff
|
||||||
|
|||||||
Reference in New Issue
Block a user