OClCollMeshEngn,PcloudStimProd: Produce into intensity stimbuff

PcloudStimulusBuffer::produceFrameReq():
Now correctly produces into the stim frames for the
PcloudIntensityStimulusBuffer object that's attached to the
PcloudStimulusProducer. If there's no attached I stimbuff, then
the OpenCL kernel will simply not write out the intensity data.

This is the first moment when we actually use the SP-MC ringbuffer
properly and actually cycle through the frames, producing into
them one by one.
This commit is contained in:
2025-11-23 05:54:51 -04:00
parent a025d13fce
commit 79df8b3f74
4 changed files with 175 additions and 36 deletions
@@ -165,7 +165,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Complete any running kernels
if (compactIsRunning) { compactKernelComplete(true); }
if (collateIsRunning) { collateKernelComplete(true); }
if (collateIsRunning) { collateKernelComplete(std::nullopt, true); }
// Release OpenCL buffers via smo hooks
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
@@ -325,6 +325,7 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
collateKernelCbFn callback)
{
// Store the caller's callback
@@ -345,8 +346,8 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
};
// Setup args callable
auto setupArgs = [this, &assemblyBuff]() {
return setupCollateDgramsArgs(assemblyBuff);
auto setupArgs = [this, &assemblyBuff, intensityStimFrame]() {
return setupCollateDgramsArgs(assemblyBuff, intensityStimFrame);
};
/** EXPLANATION:
@@ -374,6 +375,28 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
unmapCollationBuffer();
// Map/unmap intensity buffer if it exists
if (intensityStimFrame.has_value())
{
StimulusFrame& intensityFrame = intensityStimFrame->get();
cl_mem intensityClBuffer = intensityFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (intensityClBuffer)
{
void* mappedIntensityBuffer = nullptr;
if (!mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedIntensityBuffer))
{
std::cerr << __func__ << ": failed to map intensity buffer"
<< std::endl;
return false;
}
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
}
}
// Calculate global work size (just num slots in the frame)
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
@@ -528,7 +551,8 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
}
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
StagingBuffer& assemblyBuff)
StagingBuffer& assemblyBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame)
{
// Extract parameters for collateDgrams kernel
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
@@ -563,7 +587,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 2, sizeof(uint32_t), &slotStride);
// Set intensity buffer argument (arg 2)
cl_mem intensityClBuffer = nullptr;
if (intensityStimFrame.has_value())
{
intensityClBuffer = intensityStimFrame->get().clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
}
err = clSetKernelArg(collateKernel, 2, sizeof(cl_mem), &intensityClBuffer);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 2: " << err
@@ -571,7 +602,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &nPointsPerSlot);
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &slotStride);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
@@ -579,7 +610,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nDgramsPerFrame);
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
@@ -587,6 +618,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
<< std::endl;
return false;
}
return true;
}
@@ -611,8 +650,9 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
else { mapFlags = CL_MAP_READ; }
mapAssemblyBuffer(mapFlags);
unmapAssemblyBuffer();
if (mapAssemblyBuffer(mapFlags)) {
unmapAssemblyBuffer();
}
clFlush(computeDevice->commandQueue);
// Stop only compact kernel
@@ -628,7 +668,9 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
compactIsRunning = false;
}
void OpenClCollatingAndMeshingEngine::collateKernelComplete(bool isFinalizing)
void OpenClCollatingAndMeshingEngine::collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
bool isFinalizing)
{
cl_map_flags mapFlags;
/** EXPLANATION:
@@ -638,8 +680,28 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(bool isFinalizing)
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
else { mapFlags = CL_MAP_READ; }
mapCollationBuffer(mapFlags);
unmapCollationBuffer();
if (mapCollationBuffer(mapFlags)) {
unmapCollationBuffer();
}
// Map/unmap intensity buffer if it exists
if (intensityStimFrame.has_value())
{
StimulusFrame& intensityFrame = intensityStimFrame->get();
cl_mem intensityClBuffer = intensityFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (intensityClBuffer)
{
void* mappedIntensityBuffer = nullptr;
if (mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
CL_MAP_READ, mappedIntensityBuffer))
{
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
}
}
}
clFlush(computeDevice->commandQueue);
// Stop only collate kernel
@@ -770,18 +832,21 @@ private:
OpenClCollatingAndMeshingEngine& engine;
AsynchronousLoop frameAssemblyResult;
StimulusFrame& stimulusFrame;
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
public:
CompactCollateAndMeshFrameReq(
OpenClCollatingAndMeshingEngine& engine_,
AsynchronousLoop& asyncLoop,
StimulusFrame& stimulusFrame_,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
const std::shared_ptr<ComponentThread>& caller,
Callback<compactCollateAndMeshFrameReqCbFn> cb)
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
caller, cb),
engine(engine_),
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_)
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
intensityStimFrame(intensityStimFrame_)
{}
public:
@@ -876,6 +941,7 @@ public:
bool success = engine.startCollateKernel(
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
context->intensityStimFrame,
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
@@ -884,7 +950,7 @@ public:
if (!success)
{
engine.collateKernelComplete();
engine.collateKernelComplete(context->intensityStimFrame);
callOriginalCallback(false);
return;
}
@@ -904,7 +970,16 @@ public:
return;
}
engine.collateKernelComplete();
/** EXPLANATION:
* The reason we don't call collateKernelComplete before checking
* shouldAcceptRequests is because if shouldAcceptRequests is false, then
* we shouldn't touch any of the collate cycle state...or any state within
* the engine at all, since finalize() may well have been called.
*
* Therefore it's finalize()'s responsibility to ensure that it properly
* completes/cleans up any in-flight operations.
*/
engine.collateKernelComplete(context->intensityStimFrame);
// Record collate kernel end time
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
@@ -947,6 +1022,7 @@ public:
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback)
{
{
@@ -960,7 +1036,7 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
auto caller = smoHooksPtr->ComponentThread_getSelf();
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
*this, asyncLoop, stimulusFrame,
*this, asyncLoop, stimulusFrame, intensityStimFrame,
caller,
std::move(callback));