livoxG1:OCLEngine: compile compactor program

This commit is contained in:
2025-11-09 03:44:56 -04:00
parent 511f1796e8
commit 01ba68f2b5
3 changed files with 59 additions and 10 deletions
@@ -8,6 +8,7 @@
#include "openClCollatingAndMeshingEngine.h" #include "openClCollatingAndMeshingEngine.h"
#include "pcloudStimulusBuffer.h" #include "pcloudStimulusBuffer.h"
#include "openClKernels.h" #include "openClKernels.h"
#include "frameAssemblyDesc.h"
namespace smo { namespace smo {
namespace stim_buff { namespace stim_buff {
@@ -147,8 +148,8 @@ bool OpenClCollatingAndMeshingEngine::setup()
} }
// Create program and kernel from external source // Create program and kernel from external source
kernelSource = collateKernelStart; kernelSource = slotCompactorKernelStart;
kernelSourceLen = collateKernelNBytes; kernelSourceLen = slotCompactorKernelNBytes;
program = clCreateProgramWithSource( program = clCreateProgramWithSource(
context, 1, &kernelSource, &kernelSourceLen, &err); context, 1, &kernelSource, &kernelSourceLen, &err);
@@ -181,7 +182,7 @@ bool OpenClCollatingAndMeshingEngine::setup()
goto cleanup; goto cleanup;
} }
kernel = clCreateKernel(program, "collate", &err); kernel = clCreateKernel(program, "slotCompactor", &err);
if (err != CL_SUCCESS || !kernel) if (err != CL_SUCCESS || !kernel)
{ {
std::cerr << __func__ << ": failed to create kernel: " std::cerr << __func__ << ": failed to create kernel: "
@@ -287,7 +288,8 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback(
void OpenClCollatingAndMeshingEngine::start( void OpenClCollatingAndMeshingEngine::start(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
collateFrameReqCbFn callback) collateFrameReqCbFn callback,
uint32_t nSucceeded)
{ {
if (!isSetup) if (!isSetup)
{ {
@@ -322,7 +324,22 @@ void OpenClCollatingAndMeshingEngine::start(
// Store the caller's callback // Store the caller's callback
collateFrameReqCb = callback; collateFrameReqCb = callback;
// Set kernel arguments // Get FrameAssemblyDesc from assembly buffer
std::shared_ptr<FrameAssemblyDesc> frameDesc =
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
if (!frameDesc || frameDesc->slots.empty())
{
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
return;
}
// Extract parameters for slotCompactor kernel
uint32_t numSlots = static_cast<uint32_t>(frameDesc->numSlots);
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
uint32_t slotSize = static_cast<uint32_t>(frameDesc->slotSizeBytes);
uint32_t firstSlotOffset = static_cast<uint32_t>(assemblyBuff.firstSlotOffsetNBytes);
// Set kernel arguments for slotCompactor
cl_int err; cl_int err;
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clAssemblyBuffer); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clAssemblyBuffer);
if (err != CL_SUCCESS) if (err != CL_SUCCESS)
@@ -331,17 +348,44 @@ void OpenClCollatingAndMeshingEngine::start(
return; return;
} }
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clCollationBuffer); err = clSetKernelArg(kernel, 1, sizeof(uint32_t), &numSlots);
if (err != CL_SUCCESS) if (err != CL_SUCCESS)
{ {
std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl; std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl;
return; return;
} }
// TODO: Set additional kernel arguments as needed (e.g., buffer sizes, metadata) err = clSetKernelArg(kernel, 2, sizeof(uint32_t), &slotStride);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 2: " << err << std::endl;
return;
}
// Enqueue kernel execution err = clSetKernelArg(kernel, 3, sizeof(uint32_t), &slotSize);
size_t globalWorkSize = 1; // TODO: Calculate appropriate work size if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 3: " << err << std::endl;
return;
}
err = clSetKernelArg(kernel, 4, sizeof(uint32_t), &firstSlotOffset);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 4: " << err << std::endl;
return;
}
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
err = clSetKernelArg(kernel, 5, sizeof(uint32_t), &nSucceededUint);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 5: " << err << std::endl;
return;
}
// Enqueue kernel execution (single work item for sequential processing)
size_t globalWorkSize = 1;
err = clEnqueueNDRangeKernel( err = clEnqueueNDRangeKernel(
commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr, commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr,
0, nullptr, &currentKernelEvent); 0, nullptr, &currentKernelEvent);
@@ -39,7 +39,8 @@ public:
void start( void start(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff, StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
collateFrameReqCbFn callback); collateFrameReqCbFn callback,
uint32_t nSucceeded);
void stop(); void stop();
private: private:
+4
View File
@@ -163,8 +163,12 @@ private:
// Layout/invariants // Layout/invariants
size_t nDgramsPerFrame; size_t nDgramsPerFrame;
public:
size_t slotStrideNBytes; size_t slotStrideNBytes;
size_t firstSlotOffsetNBytes; // offset from buffer start to first slot size_t firstSlotOffsetNBytes; // offset from buffer start to first slot
private:
IOEngineConstraints inputConstraints; IOEngineConstraints inputConstraints;
// Descriptor (computed once; reused across frames) // Descriptor (computed once; reused across frames)