livoxG1:OClCollMeshEngn: Wrestling and massaging
This commit is contained in:
@@ -30,7 +30,8 @@ currentKernelEvent(nullptr),
|
|||||||
assemblyBufferPtr(nullptr),
|
assemblyBufferPtr(nullptr),
|
||||||
assemblyBufferSize(0),
|
assemblyBufferSize(0),
|
||||||
collationBufferPtr(nullptr),
|
collationBufferPtr(nullptr),
|
||||||
collationBufferSize(0)
|
collationBufferSize(0),
|
||||||
|
frameAssemblyDesc(nullptr)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,6 +101,16 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
|||||||
collationBufferPtr = collationIov.iov_base;
|
collationBufferPtr = collationIov.iov_base;
|
||||||
collationBufferSize = collationIov.iov_len;
|
collationBufferSize = collationIov.iov_len;
|
||||||
|
|
||||||
|
// Get FrameAssemblyDesc from assembly buffer
|
||||||
|
frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>(
|
||||||
|
parent.assemblyBuffer);
|
||||||
|
|
||||||
|
if (!frameAssemblyDesc || frameAssemblyDesc->slots.empty())
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
// Create OpenCL buffers using CL_MEM_USE_HOST_PTR for zero-copy
|
// Create OpenCL buffers using CL_MEM_USE_HOST_PTR for zero-copy
|
||||||
clAssemblyBuffer = clCreateBuffer(
|
clAssemblyBuffer = clCreateBuffer(
|
||||||
context,
|
context,
|
||||||
@@ -206,6 +217,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
|||||||
assemblyBufferSize = 0;
|
assemblyBufferSize = 0;
|
||||||
collationBufferPtr = nullptr;
|
collationBufferPtr = nullptr;
|
||||||
collationBufferSize = 0;
|
collationBufferSize = 0;
|
||||||
|
frameAssemblyDesc = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Static callback for OpenCL event
|
// Static callback for OpenCL event
|
||||||
@@ -226,7 +238,7 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenClCollatingAndMeshingEngine::start(
|
bool OpenClCollatingAndMeshingEngine::start(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
collateFrameReqCbFn callback,
|
collateFrameReqCbFn callback,
|
||||||
uint32_t nSucceeded)
|
uint32_t nSucceeded)
|
||||||
@@ -234,14 +246,14 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
if (!isSetup)
|
if (!isSetup)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": engine not set up" << std::endl;
|
std::cerr << __func__ << ": engine not set up" << std::endl;
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isRunning)
|
if (isRunning)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": already running, call stop() first"
|
std::cerr << __func__ << ": already running, call stop() first"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate buffers match what we set up
|
// Validate buffers match what we set up
|
||||||
@@ -249,38 +261,24 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
struct iovec collationIov = collationBuff.getClEngineIovec();
|
struct iovec collationIov = collationBuff.getClEngineIovec();
|
||||||
|
|
||||||
if (assemblyIov.iov_base != assemblyBufferPtr
|
if (assemblyIov.iov_base != assemblyBufferPtr
|
||||||
|| assemblyIov.iov_len != assemblyBufferSize)
|
|| assemblyIov.iov_len != assemblyBufferSize
|
||||||
{
|
|| collationIov.iov_base != collationBufferPtr
|
||||||
std::cerr << __func__ << ": assembly buffer mismatch" << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (collationIov.iov_base != collationBufferPtr
|
|
||||||
|| collationIov.iov_len != collationBufferSize)
|
|| collationIov.iov_len != collationBufferSize)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": collation buffer mismatch" << std::endl;
|
throw std::runtime_error(
|
||||||
return;
|
std::string(__func__) + ": buffer mismatch - buffers have changed");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the caller's callback
|
// Store the caller's callback
|
||||||
collateFrameReqCb = callback;
|
collateFrameReqCb = callback;
|
||||||
|
|
||||||
// Get FrameAssemblyDesc from assembly buffer
|
|
||||||
std::shared_ptr<FrameAssemblyDesc> frameDesc =
|
|
||||||
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
|
|
||||||
if (!frameDesc || frameDesc->slots.empty())
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set up kernel arguments for slotCompactor
|
// Set up kernel arguments for slotCompactor
|
||||||
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
|
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
// Set up kernel arguments for collateDgrams
|
// Set up kernel arguments for collateDgrams
|
||||||
if (!setupCollateDgramsArgs(assemblyBuff)) {
|
if (!setupCollateDgramsArgs(assemblyBuff)) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
|
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
|
||||||
@@ -293,7 +291,7 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: "
|
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: "
|
||||||
<< err << std::endl;
|
<< err << std::endl;
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set up callback using static member function
|
// Set up callback using static member function
|
||||||
@@ -310,7 +308,7 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
clReleaseEvent(currentKernelEvent);
|
clReleaseEvent(currentKernelEvent);
|
||||||
currentKernelEvent = nullptr;
|
currentKernelEvent = nullptr;
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Set up timeout timer in continuation class
|
// TODO: Set up timeout timer in continuation class
|
||||||
@@ -319,6 +317,7 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
isRunning = true;
|
isRunning = true;
|
||||||
// start() is synchronous - it returns immediately after setting up kernel execution
|
// start() is synchronous - it returns immediately after setting up kernel execution
|
||||||
// The callback will be invoked when the kernel completes
|
// The callback will be invoked when the kernel completes
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel(
|
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel(
|
||||||
@@ -398,19 +397,10 @@ bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernels()
|
|||||||
bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
||||||
StagingBuffer& assemblyBuff, uint32_t nSucceeded)
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded)
|
||||||
{
|
{
|
||||||
// Get FrameAssemblyDesc from assembly buffer
|
|
||||||
std::shared_ptr<FrameAssemblyDesc> frameDesc =
|
|
||||||
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
|
|
||||||
if (!frameDesc || frameDesc->slots.empty())
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract parameters for slotCompactor kernel
|
// Extract parameters for slotCompactor kernel
|
||||||
uint32_t numSlots = static_cast<uint32_t>(frameDesc->numSlots);
|
uint32_t numSlots = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
||||||
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||||
uint32_t slotSize = static_cast<uint32_t>(frameDesc->slotSizeBytes);
|
uint32_t slotSize = static_cast<uint32_t>(frameAssemblyDesc->slotSizeBytes);
|
||||||
uint32_t firstSlotOffset = static_cast<uint32_t>(
|
uint32_t firstSlotOffset = static_cast<uint32_t>(
|
||||||
assemblyBuff.firstSlotOffsetNBytes);
|
assemblyBuff.firstSlotOffsetNBytes);
|
||||||
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
|
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
|
||||||
@@ -477,15 +467,6 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
|||||||
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||||
StagingBuffer& assemblyBuff)
|
StagingBuffer& assemblyBuff)
|
||||||
{
|
{
|
||||||
// Get FrameAssemblyDesc from assembly buffer
|
|
||||||
std::shared_ptr<FrameAssemblyDesc> frameDesc =
|
|
||||||
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
|
|
||||||
if (!frameDesc || frameDesc->slots.empty())
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract parameters for collateDgrams kernel
|
// Extract parameters for collateDgrams kernel
|
||||||
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||||
uint32_t firstSlotOffset = static_cast<uint32_t>(
|
uint32_t firstSlotOffset = static_cast<uint32_t>(
|
||||||
@@ -500,7 +481,8 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
|||||||
int returnMode = static_cast<int>(parent.device->currentReturnMode);
|
int returnMode = static_cast<int>(parent.device->currentReturnMode);
|
||||||
uint32_t nPointsPerSlot = static_cast<uint32_t>(
|
uint32_t nPointsPerSlot = static_cast<uint32_t>(
|
||||||
IoUringAssemblyEngine::computePointsPerDgram(returnMode));
|
IoUringAssemblyEngine::computePointsPerDgram(returnMode));
|
||||||
uint32_t nDgramsPerFrame = static_cast<uint32_t>(frameDesc->numSlots);
|
uint32_t nDgramsPerFrame = static_cast<uint32_t>(
|
||||||
|
frameAssemblyDesc->numSlots);
|
||||||
|
|
||||||
// Set kernel arguments for collateDgrams
|
// Set kernel arguments for collateDgrams
|
||||||
cl_int err;
|
cl_int err;
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#define CL_TARGET_OPENCL_VERSION 300
|
#define CL_TARGET_OPENCL_VERSION 300
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#include "stagingBuffer.h"
|
#include "stagingBuffer.h"
|
||||||
|
#include "frameAssemblyDesc.h"
|
||||||
|
|
||||||
namespace smo {
|
namespace smo {
|
||||||
namespace stim_buff {
|
namespace stim_buff {
|
||||||
@@ -37,7 +38,7 @@ public:
|
|||||||
// Callback function type for collateFrameReq
|
// Callback function type for collateFrameReq
|
||||||
typedef std::function<void()> collateFrameReqCbFn;
|
typedef std::function<void()> collateFrameReqCbFn;
|
||||||
|
|
||||||
void start(
|
bool start(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
collateFrameReqCbFn callback,
|
collateFrameReqCbFn callback,
|
||||||
uint32_t nSucceeded);
|
uint32_t nSucceeded);
|
||||||
@@ -71,6 +72,9 @@ private:
|
|||||||
void* collationBufferPtr;
|
void* collationBufferPtr;
|
||||||
size_t collationBufferSize;
|
size_t collationBufferSize;
|
||||||
|
|
||||||
|
// Frame descriptor (cached from setup)
|
||||||
|
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
|
||||||
|
|
||||||
// Callback storage
|
// Callback storage
|
||||||
collateFrameReqCbFn collateFrameReqCb;
|
collateFrameReqCbFn collateFrameReqCb;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user