OClCollatingMeshingEngn: Compile both kernels side by side
This commit is contained in:
@@ -3,12 +3,12 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <boost/system/error_code.hpp>
|
#include <boost/system/error_code.hpp>
|
||||||
#include "openClCollatingAndMeshingEngine.h"
|
#include "openClCollatingAndMeshingEngine.h"
|
||||||
#include "pcloudStimulusBuffer.h"
|
#include "pcloudStimulusBuffer.h"
|
||||||
#include "openClKernels.h"
|
#include "openClKernels.h"
|
||||||
#include "frameAssemblyDesc.h"
|
#include "frameAssemblyDesc.h"
|
||||||
|
#include "ioUringAssemblyEngine.h"
|
||||||
|
|
||||||
namespace smo {
|
namespace smo {
|
||||||
namespace stim_buff {
|
namespace stim_buff {
|
||||||
@@ -19,14 +19,13 @@ platform(nullptr),
|
|||||||
device(nullptr),
|
device(nullptr),
|
||||||
context(nullptr),
|
context(nullptr),
|
||||||
commandQueue(nullptr),
|
commandQueue(nullptr),
|
||||||
program(nullptr),
|
slotCompactorProgram(nullptr), collateProgram(nullptr),
|
||||||
kernel(nullptr),
|
slotCompactorKernel(nullptr), collateKernel(nullptr),
|
||||||
isSetup(false),
|
isSetup(false),
|
||||||
clAssemblyBuffer(nullptr),
|
clAssemblyBuffer(nullptr),
|
||||||
clCollationBuffer(nullptr),
|
clCollationBuffer(nullptr),
|
||||||
isRunning(false),
|
isRunning(false),
|
||||||
currentKernelEvent(nullptr),
|
currentKernelEvent(nullptr),
|
||||||
memoryPinned(false),
|
|
||||||
assemblyBufferPtr(nullptr),
|
assemblyBufferPtr(nullptr),
|
||||||
assemblyBufferSize(0),
|
assemblyBufferSize(0),
|
||||||
collationBufferPtr(nullptr),
|
collationBufferPtr(nullptr),
|
||||||
@@ -90,8 +89,6 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
|||||||
// Declare variables early to avoid goto crossing initialization
|
// Declare variables early to avoid goto crossing initialization
|
||||||
struct iovec assemblyIov;
|
struct iovec assemblyIov;
|
||||||
struct iovec collationIov;
|
struct iovec collationIov;
|
||||||
const char* kernelSource;
|
|
||||||
size_t kernelSourceLen;
|
|
||||||
|
|
||||||
// Get StagingBuffer memory pointers from parent
|
// Get StagingBuffer memory pointers from parent
|
||||||
assemblyIov = parent.assemblyBuffer.getClEngineIovec();
|
assemblyIov = parent.assemblyBuffer.getClEngineIovec();
|
||||||
@@ -102,24 +99,6 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
|||||||
collationBufferPtr = collationIov.iov_base;
|
collationBufferPtr = collationIov.iov_base;
|
||||||
collationBufferSize = collationIov.iov_len;
|
collationBufferSize = collationIov.iov_len;
|
||||||
|
|
||||||
// Pin memory pages using mlock()
|
|
||||||
if (mlock(assemblyBufferPtr, assemblyBufferSize) != 0)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to pin assembly buffer memory: "
|
|
||||||
<< strerror(errno) << std::endl;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mlock(collationBufferPtr, collationBufferSize) != 0)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to pin collation buffer memory: "
|
|
||||||
<< strerror(errno) << std::endl;
|
|
||||||
munlock(assemblyBufferPtr, assemblyBufferSize);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
memoryPinned = true;
|
|
||||||
|
|
||||||
// Create OpenCL buffers using CL_MEM_USE_HOST_PTR for zero-copy
|
// Create OpenCL buffers using CL_MEM_USE_HOST_PTR for zero-copy
|
||||||
clAssemblyBuffer = clCreateBuffer(
|
clAssemblyBuffer = clCreateBuffer(
|
||||||
context,
|
context,
|
||||||
@@ -147,46 +126,9 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create program and kernel from external source
|
// Compile and prepare both kernels
|
||||||
kernelSource = slotCompactorKernelStart;
|
if (!compileAndPrepareKernels())
|
||||||
kernelSourceLen = slotCompactorKernelNBytes;
|
|
||||||
program = clCreateProgramWithSource(
|
|
||||||
context, 1, &kernelSource, &kernelSourceLen, &err);
|
|
||||||
|
|
||||||
if (err != CL_SUCCESS || !program)
|
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to create program: "
|
|
||||||
<< err << std::endl;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
|
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to build program: "
|
|
||||||
<< err << std::endl;
|
|
||||||
|
|
||||||
// Print build log if available
|
|
||||||
size_t logSize = 0;
|
|
||||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
|
||||||
0, nullptr, &logSize);
|
|
||||||
|
|
||||||
if (logSize > 0)
|
|
||||||
{
|
|
||||||
std::vector<char> log(logSize);
|
|
||||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
|
||||||
logSize, log.data(), nullptr);
|
|
||||||
std::cerr << "Build log: " << log.data() << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel = clCreateKernel(program, "slotCompactor", &err);
|
|
||||||
if (err != CL_SUCCESS || !kernel)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to create kernel: "
|
|
||||||
<< err << std::endl;
|
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -203,19 +145,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
|||||||
// Call stop() first
|
// Call stop() first
|
||||||
stop();
|
stop();
|
||||||
|
|
||||||
// Unpin memory pages if they were pinned
|
|
||||||
if (memoryPinned)
|
|
||||||
{
|
|
||||||
if (collationBufferPtr && collationBufferSize > 0) {
|
|
||||||
munlock(collationBufferPtr, collationBufferSize);
|
|
||||||
}
|
|
||||||
if (assemblyBufferPtr && assemblyBufferSize > 0) {
|
|
||||||
munlock(assemblyBufferPtr, assemblyBufferSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
memoryPinned = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Release OpenCL buffers in reverse order
|
// Release OpenCL buffers in reverse order
|
||||||
if (clCollationBuffer)
|
if (clCollationBuffer)
|
||||||
{
|
{
|
||||||
@@ -228,18 +157,28 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
|||||||
clAssemblyBuffer = nullptr;
|
clAssemblyBuffer = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release kernel
|
// Release kernels
|
||||||
if (kernel)
|
if (slotCompactorKernel)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(slotCompactorKernel);
|
||||||
kernel = nullptr;
|
slotCompactorKernel = nullptr;
|
||||||
|
}
|
||||||
|
if (collateKernel)
|
||||||
|
{
|
||||||
|
clReleaseKernel(collateKernel);
|
||||||
|
collateKernel = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release program
|
// Release programs
|
||||||
if (program)
|
if (slotCompactorProgram)
|
||||||
{
|
{
|
||||||
clReleaseProgram(program);
|
clReleaseProgram(slotCompactorProgram);
|
||||||
program = nullptr;
|
slotCompactorProgram = nullptr;
|
||||||
|
}
|
||||||
|
if (collateProgram)
|
||||||
|
{
|
||||||
|
clReleaseProgram(collateProgram);
|
||||||
|
collateProgram = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release command queue
|
// Release command queue
|
||||||
@@ -333,66 +272,24 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract parameters for slotCompactor kernel
|
// Set up kernel arguments for slotCompactor
|
||||||
uint32_t numSlots = static_cast<uint32_t>(frameDesc->numSlots);
|
if (!setupSlotCompactorsArgs(assemblyBuff, nSucceeded)) {
|
||||||
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
return;
|
||||||
uint32_t slotSize = static_cast<uint32_t>(frameDesc->slotSizeBytes);
|
}
|
||||||
uint32_t firstSlotOffset = static_cast<uint32_t>(assemblyBuff.firstSlotOffsetNBytes);
|
// Set up kernel arguments for collateDgrams
|
||||||
|
if (!setupCollateDgramsArgs(assemblyBuff)) {
|
||||||
// Set kernel arguments for slotCompactor
|
|
||||||
cl_int err;
|
|
||||||
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clAssemblyBuffer);
|
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 0: " << err << std::endl;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = clSetKernelArg(kernel, 1, sizeof(uint32_t), &numSlots);
|
// Enqueue slotCompactor kernel execution (single work item for sequential processing)
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = clSetKernelArg(kernel, 2, sizeof(uint32_t), &slotStride);
|
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 2: " << err << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = clSetKernelArg(kernel, 3, sizeof(uint32_t), &slotSize);
|
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 3: " << err << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = clSetKernelArg(kernel, 4, sizeof(uint32_t), &firstSlotOffset);
|
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 4: " << err << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
|
|
||||||
err = clSetKernelArg(kernel, 5, sizeof(uint32_t), &nSucceededUint);
|
|
||||||
if (err != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 5: " << err << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enqueue kernel execution (single work item for sequential processing)
|
|
||||||
size_t globalWorkSize = 1;
|
size_t globalWorkSize = 1;
|
||||||
err = clEnqueueNDRangeKernel(
|
cl_int err = clEnqueueNDRangeKernel(
|
||||||
commandQueue, kernel, 1, nullptr, &globalWorkSize, nullptr,
|
commandQueue, slotCompactorKernel, 1, nullptr, &globalWorkSize, nullptr,
|
||||||
0, nullptr, ¤tKernelEvent);
|
0, nullptr, ¤tKernelEvent);
|
||||||
|
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to enqueue kernel: " << err << std::endl;
|
std::cerr << __func__ << ": failed to enqueue slotCompactor kernel: " << err << std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -420,6 +317,219 @@ void OpenClCollatingAndMeshingEngine::start(
|
|||||||
// The callback will be invoked when the kernel completes
|
// The callback will be invoked when the kernel completes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel(
|
||||||
|
const char* kernelSource, size_t kernelSourceLen,
|
||||||
|
const char* kernelName, cl_program& program, cl_kernel& kernel)
|
||||||
|
{
|
||||||
|
cl_int err;
|
||||||
|
|
||||||
|
// Create program from source
|
||||||
|
program = clCreateProgramWithSource(
|
||||||
|
context, 1, &kernelSource, &kernelSourceLen, &err);
|
||||||
|
|
||||||
|
if (err != CL_SUCCESS || !program)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to create " << kernelName
|
||||||
|
<< " program: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build program
|
||||||
|
err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to build " << kernelName
|
||||||
|
<< " program: " << err << std::endl;
|
||||||
|
|
||||||
|
// Print build log if available
|
||||||
|
size_t logSize = 0;
|
||||||
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
||||||
|
0, nullptr, &logSize);
|
||||||
|
|
||||||
|
if (logSize > 0)
|
||||||
|
{
|
||||||
|
std::vector<char> log(logSize);
|
||||||
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
||||||
|
logSize, log.data(), nullptr);
|
||||||
|
std::cerr << kernelName << " build log: " << log.data() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create kernel
|
||||||
|
kernel = clCreateKernel(program, kernelName, &err);
|
||||||
|
if (err != CL_SUCCESS || !kernel)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to create " << kernelName
|
||||||
|
<< " kernel: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernels()
|
||||||
|
{
|
||||||
|
// Compile slotCompactor kernel
|
||||||
|
if (!compileAndPrepareKernel(
|
||||||
|
slotCompactorKernelStart, slotCompactorKernelNBytes,
|
||||||
|
"slotCompactor", slotCompactorProgram, slotCompactorKernel))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile collateDgrams kernel
|
||||||
|
if (!compileAndPrepareKernel(
|
||||||
|
collateKernelStart, collateKernelNBytes,
|
||||||
|
"collate", collateProgram, collateKernel))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
||||||
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded)
|
||||||
|
{
|
||||||
|
// Get FrameAssemblyDesc from assembly buffer
|
||||||
|
std::shared_ptr<FrameAssemblyDesc> frameDesc =
|
||||||
|
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
|
||||||
|
if (!frameDesc || frameDesc->slots.empty())
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract parameters for slotCompactor kernel
|
||||||
|
uint32_t numSlots = static_cast<uint32_t>(frameDesc->numSlots);
|
||||||
|
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||||
|
uint32_t slotSize = static_cast<uint32_t>(frameDesc->slotSizeBytes);
|
||||||
|
uint32_t firstSlotOffset = static_cast<uint32_t>(assemblyBuff.firstSlotOffsetNBytes);
|
||||||
|
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
|
||||||
|
|
||||||
|
// Set kernel arguments for slotCompactor
|
||||||
|
cl_int err;
|
||||||
|
err = clSetKernelArg(slotCompactorKernel, 0, sizeof(cl_mem), &clAssemblyBuffer);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 0: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(slotCompactorKernel, 1, sizeof(uint32_t), &numSlots);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(slotCompactorKernel, 2, sizeof(uint32_t), &slotStride);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 2: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(slotCompactorKernel, 3, sizeof(uint32_t), &slotSize);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 3: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(slotCompactorKernel, 4, sizeof(uint32_t), &firstSlotOffset);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 4: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(slotCompactorKernel, 5, sizeof(uint32_t), &nSucceededUint);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 5: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||||
|
StagingBuffer& assemblyBuff)
|
||||||
|
{
|
||||||
|
// Get FrameAssemblyDesc from assembly buffer
|
||||||
|
std::shared_ptr<FrameAssemblyDesc> frameDesc =
|
||||||
|
static_cast<std::shared_ptr<FrameAssemblyDesc>>(assemblyBuff);
|
||||||
|
if (!frameDesc || frameDesc->slots.empty())
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract parameters for collateDgrams kernel
|
||||||
|
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||||
|
uint32_t firstSlotOffset = static_cast<uint32_t>(assemblyBuff.firstSlotOffsetNBytes);
|
||||||
|
|
||||||
|
// Calculate nPointsPerSlot from device return mode
|
||||||
|
if (!parent.device)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": device not available" << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int returnMode = static_cast<int>(parent.device->currentReturnMode);
|
||||||
|
uint32_t nPointsPerSlot = static_cast<uint32_t>(
|
||||||
|
IoUringAssemblyEngine::computePointsPerDgram(returnMode));
|
||||||
|
uint32_t nDgramsPerFrame = static_cast<uint32_t>(frameDesc->numSlots);
|
||||||
|
|
||||||
|
// Set kernel arguments for collateDgrams
|
||||||
|
cl_int err;
|
||||||
|
err = clSetKernelArg(collateKernel, 0, sizeof(cl_mem), &clAssemblyBuffer);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 0: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(collateKernel, 1, sizeof(cl_mem), &clCollationBuffer);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 1: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(collateKernel, 2, sizeof(uint32_t), &slotStride);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 2: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &firstSlotOffset);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 3: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 4: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 5: " << err << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void OpenClCollatingAndMeshingEngine::stop()
|
void OpenClCollatingAndMeshingEngine::stop()
|
||||||
{
|
{
|
||||||
if (!isRunning) {
|
if (!isRunning) {
|
||||||
|
|||||||
@@ -51,8 +51,10 @@ private:
|
|||||||
cl_device_id device;
|
cl_device_id device;
|
||||||
cl_context context;
|
cl_context context;
|
||||||
cl_command_queue commandQueue;
|
cl_command_queue commandQueue;
|
||||||
cl_program program;
|
cl_program slotCompactorProgram;
|
||||||
cl_kernel kernel;
|
cl_program collateProgram;
|
||||||
|
cl_kernel slotCompactorKernel;
|
||||||
|
cl_kernel collateKernel;
|
||||||
bool isSetup;
|
bool isSetup;
|
||||||
|
|
||||||
// OpenCL buffers
|
// OpenCL buffers
|
||||||
@@ -62,9 +64,8 @@ private:
|
|||||||
// State tracking
|
// State tracking
|
||||||
bool isRunning;
|
bool isRunning;
|
||||||
cl_event currentKernelEvent;
|
cl_event currentKernelEvent;
|
||||||
bool memoryPinned;
|
|
||||||
|
|
||||||
// Memory pinning tracking
|
// Memory tracking
|
||||||
void* assemblyBufferPtr;
|
void* assemblyBufferPtr;
|
||||||
size_t assemblyBufferSize;
|
size_t assemblyBufferSize;
|
||||||
void* collationBufferPtr;
|
void* collationBufferPtr;
|
||||||
@@ -76,6 +77,15 @@ private:
|
|||||||
// Static callback for OpenCL event
|
// Static callback for OpenCL event
|
||||||
static void CL_CALLBACK kernelEventCallback(
|
static void CL_CALLBACK kernelEventCallback(
|
||||||
cl_event event, cl_int event_command_exec_status, void* user_data);
|
cl_event event, cl_int event_command_exec_status, void* user_data);
|
||||||
|
|
||||||
|
// Private helper methods
|
||||||
|
bool compileAndPrepareKernel(
|
||||||
|
const char* kernelSource, size_t kernelSourceLen,
|
||||||
|
const char* kernelName, cl_program& program, cl_kernel& kernel);
|
||||||
|
bool compileAndPrepareKernels();
|
||||||
|
bool setupSlotCompactorsArgs(
|
||||||
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
|
||||||
|
bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace stim_buff
|
} // namespace stim_buff
|
||||||
|
|||||||
Reference in New Issue
Block a user