livoxG1:OClCollMeshEngn: Wrestling and massaging

This commit is contained in:
2025-11-09 15:18:53 -04:00
parent c8cbaed3b1
commit 683e107b04
2 changed files with 34 additions and 48 deletions
@@ -30,7 +30,8 @@ currentKernelEvent(nullptr),
assemblyBufferPtr(nullptr),
assemblyBufferSize(0),
collationBufferPtr(nullptr),
collationBufferSize(0)
collationBufferSize(0),
frameAssemblyDesc(nullptr)
{
}
@@ -100,6 +101,16 @@ bool OpenClCollatingAndMeshingEngine::setup()
collationBufferPtr = collationIov.iov_base;
collationBufferSize = collationIov.iov_len;
// Get FrameAssemblyDesc from assembly buffer
frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>(
parent.assemblyBuffer);
if (!frameAssemblyDesc || frameAssemblyDesc->slots.empty())
{
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
goto cleanup;
}
// Create OpenCL buffers using CL_MEM_USE_HOST_PTR for zero-copy
clAssemblyBuffer = clCreateBuffer(
context,
@@ -206,6 +217,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
assemblyBufferSize = 0;
collationBufferPtr = nullptr;
collationBufferSize = 0;
frameAssemblyDesc = nullptr;
}
// Static callback for OpenCL event
@@ -226,7 +238,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback(
}
}
void OpenClCollatingAndMeshingEngine::start(
bool OpenClCollatingAndMeshingEngine::start(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
collateFrameReqCbFn callback,
uint32_t nSucceeded)
@@ -234,14 +246,14 @@ void OpenClCollatingAndMeshingEngine::start(
if (!isSetup)
{
std::cerr << __func__ << ": engine not set up" << std::endl;
return;
return false;
}
if (isRunning)
{
std::cerr << __func__ << ": already running, call stop() first"
<< std::endl;
return;
return false;
}
// Validate buffers match what we set up
@@ -249,38 +261,24 @@ void OpenClCollatingAndMeshingEngine::start(
struct iovec collationIov = collationBuff.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize)
{
std::cerr << __func__ << ": assembly buffer mismatch" << std::endl;
return;
}
if (collationIov.iov_base != collationBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize
|| collationIov.iov_base != collationBufferPtr
|| collationIov.iov_len != collationBufferSize)
{
std::cerr << __func__ << ": collation buffer mismatch" << std::endl;
return;
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
}
// Store the caller's callback
collateFrameReqCb = callback;
// Get FrameAssemblyDesc from assembly buffer
std::shared_ptr<FrameAssemblyDesc> frameDesc =
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
if (!frameDesc || frameDesc->slots.empty())
{
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
return;
}
// Set up kernel arguments for slotCompactor
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
return;
return false;
}
// Set up kernel arguments for collateDgrams
if (!setupCollateDgramsArgs(assemblyBuff)) {
return;
return false;
}
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
@@ -293,7 +291,7 @@ void OpenClCollatingAndMeshingEngine::start(
{
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: "
<< err << std::endl;
return;
return false;
}
// Set up callback using static member function
@@ -310,7 +308,7 @@ void OpenClCollatingAndMeshingEngine::start(
<< std::endl;
clReleaseEvent(currentKernelEvent);
currentKernelEvent = nullptr;
return;
return false;
}
// TODO: Set up timeout timer in continuation class
@@ -319,6 +317,7 @@ void OpenClCollatingAndMeshingEngine::start(
isRunning = true;
// start() is synchronous - it returns immediately after setting up kernel execution
// The callback will be invoked when the kernel completes
return true;
}
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel(
@@ -398,19 +397,10 @@ bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernels()
bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
StagingBuffer& assemblyBuff, uint32_t nSucceeded)
{
// Get FrameAssemblyDesc from assembly buffer
std::shared_ptr<FrameAssemblyDesc> frameDesc =
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
if (!frameDesc || frameDesc->slots.empty())
{
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
return false;
}
// Extract parameters for slotCompactor kernel
uint32_t numSlots = static_cast<uint32_t>(frameDesc->numSlots);
uint32_t numSlots = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
uint32_t slotSize = static_cast<uint32_t>(frameDesc->slotSizeBytes);
uint32_t slotSize = static_cast<uint32_t>(frameAssemblyDesc->slotSizeBytes);
uint32_t firstSlotOffset = static_cast<uint32_t>(
assemblyBuff.firstSlotOffsetNBytes);
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
@@ -477,15 +467,6 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
StagingBuffer& assemblyBuff)
{
// Get FrameAssemblyDesc from assembly buffer
std::shared_ptr<FrameAssemblyDesc> frameDesc =
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
if (!frameDesc || frameDesc->slots.empty())
{
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
return false;
}
// Extract parameters for collateDgrams kernel
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
uint32_t firstSlotOffset = static_cast<uint32_t>(
@@ -500,7 +481,8 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
int returnMode = static_cast<int>(parent.device->currentReturnMode);
uint32_t nPointsPerSlot = static_cast<uint32_t>(
IoUringAssemblyEngine::computePointsPerDgram(returnMode));
uint32_t nDgramsPerFrame = static_cast<uint32_t>(frameDesc->numSlots);
uint32_t nDgramsPerFrame = static_cast<uint32_t>(
frameAssemblyDesc->numSlots);
// Set kernel arguments for collateDgrams
cl_int err;
@@ -9,6 +9,7 @@
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
#include "stagingBuffer.h"
#include "frameAssemblyDesc.h"
namespace smo {
namespace stim_buff {
@@ -37,7 +38,7 @@ public:
// Callback function type for collateFrameReq
typedef std::function<void()> collateFrameReqCbFn;
void start(
bool start(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
collateFrameReqCbFn callback,
uint32_t nSucceeded);
@@ -71,6 +72,9 @@ private:
void* collationBufferPtr;
size_t collationBufferSize;
// Frame descriptor (cached from setup)
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
// Callback storage
collateFrameReqCbFn collateFrameReqCb;