livoxG1:OCLEngine: compile compactor program
This commit is contained in:
@@ -8,6 +8,7 @@
|
|||||||
#include "openClCollatingAndMeshingEngine.h"
|
#include "openClCollatingAndMeshingEngine.h"
|
||||||
#include "pcloudStimulusBuffer.h"
|
#include "pcloudStimulusBuffer.h"
|
||||||
#include "openClKernels.h"
|
#include "openClKernels.h"
|
||||||
|
#include "frameAssemblyDesc.h"
|
||||||
|
|
||||||
namespace smo {
|
namespace smo {
|
||||||
namespace stim_buff {
|
namespace stim_buff {
|
||||||
@@ -147,8 +148,8 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create program and kernel from external source
|
// Create program and kernel from external source
|
||||||
kernelSource = collateKernelStart;
|
kernelSource = slotCompactorKernelStart;
|
||||||
kernelSourceLen = collateKernelNBytes;
|
kernelSourceLen = slotCompactorKernelNBytes;
|
||||||
program = clCreateProgramWithSource(
|
program = clCreateProgramWithSource(
|
||||||
context, 1, &kernelSource, &kernelSourceLen, &err);
|
context, 1, &kernelSource, &kernelSourceLen, &err);
|
||||||
|
|
||||||
@@ -181,7 +182,7 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel = clCreateKernel(program, "collate", &err);
|
kernel = clCreateKernel(program, "slotCompactor", &err);
|
||||||
if (err != CL_SUCCESS || !kernel)
|
if (err != CL_SUCCESS || !kernel)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to create kernel: "
|
std::cerr << __func__ << ": failed to create kernel: "
|
||||||
@@ -287,7 +288,8 @@ void CL_CALLBACK OpenClCollatingAndMeshingEngine::kernelEventCallback(
|
|||||||
|
|
||||||
void OpenClCollatingAndMeshingEngine::start(
|
void OpenClCollatingAndMeshingEngine::start(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
collateFrameReqCbFn callback)
|
collateFrameReqCbFn callback,
|
||||||
|
uint32_t nSucceeded)
|
||||||
{
|
{
|
||||||
if (!isSetup)
|
if (!isSetup)
|
||||||
{
|
{
|
||||||
@@ -322,7 +324,22 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
// Store the caller's callback
|
// Store the caller's callback
|
||||||
collateFrameReqCb = callback;
|
collateFrameReqCb = callback;
|
||||||
|
|
||||||
// Set kernel arguments
|
// Get FrameAssemblyDesc from assembly buffer
|
||||||
|
std::shared_ptr<FrameAssemblyDesc> frameDesc =
|
||||||
|
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
|
||||||
|
if (!frameDesc || frameDesc->slots.empty())
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract parameters for slotCompactor kernel
|
||||||
|
uint32_t numSlots = static_cast<uint32_t>(frameDesc->numSlots);
|
||||||
|
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||||
|
uint32_t slotSize = static_cast<uint32_t>(frameDesc->slotSizeBytes);
|
||||||
|
uint32_t firstSlotOffset = static_cast<uint32_t>(assemblyBuff.firstSlotOffsetNBytes);
|
||||||
|
|
||||||
|
// Set kernel arguments for slotCompactor
|
||||||
cl_int err;
|
cl_int err;
|
||||||
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clAssemblyBuffer);
|
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clAssemblyBuffer);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
@@ -331,17 +348,44 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clCollationBuffer);
|
err = clSetKernelArg(kernel, 1, sizeof(uint32_t), &numSlots);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl;
|
std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Set additional kernel arguments as needed (e.g., buffer sizes, metadata)
|
err = clSetKernelArg(kernel, 2, sizeof(uint32_t), &slotStride);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 2: " << err << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Enqueue kernel execution
|
err = clSetKernelArg(kernel, 3, sizeof(uint32_t), &slotSize);
|
||||||
size_t globalWorkSize = 1; // TODO: Calculate appropriate work size
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 3: " << err << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(kernel, 4, sizeof(uint32_t), &firstSlotOffset);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 4: " << err << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
|
||||||
|
err = clSetKernelArg(kernel, 5, sizeof(uint32_t), &nSucceededUint);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 5: " << err << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enqueue kernel execution (single work item for sequential processing)
|
||||||
|
size_t globalWorkSize = 1;
|
||||||
err = clEnqueueNDRangeKernel(
|
err = clEnqueueNDRangeKernel(
|
||||||
commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr,
|
commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr,
|
||||||
0, nullptr, ¤tKernelEvent);
|
0, nullptr, ¤tKernelEvent);
|
||||||
|
|||||||
@@ -39,7 +39,8 @@ public:
|
|||||||
|
|
||||||
void start(
|
void start(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
collateFrameReqCbFn callback);
|
collateFrameReqCbFn callback,
|
||||||
|
uint32_t nSucceeded);
|
||||||
void stop();
|
void stop();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -163,8 +163,12 @@ private:
|
|||||||
|
|
||||||
// Layout/invariants
|
// Layout/invariants
|
||||||
size_t nDgramsPerFrame;
|
size_t nDgramsPerFrame;
|
||||||
|
|
||||||
|
public:
|
||||||
size_t slotStrideNBytes;
|
size_t slotStrideNBytes;
|
||||||
size_t firstSlotOffsetNBytes; // offset from buffer start to first slot
|
size_t firstSlotOffsetNBytes; // offset from buffer start to first slot
|
||||||
|
|
||||||
|
private:
|
||||||
IOEngineConstraints inputConstraints;
|
IOEngineConstraints inputConstraints;
|
||||||
|
|
||||||
// Descriptor (computed once; reused across frames)
|
// Descriptor (computed once; reused across frames)
|
||||||
|
|||||||
Reference in New Issue
Block a user