OClCollMeshEngn,PcloudStimProd: Produce into intensity stimbuff
PcloudStimulusBuffer::produceFrameReq(): Now correctly produces into the stim frames for the PcloudIntensityStimulusBuffer object that's attached to the PcloudStimulusProducer. If there's no attached I stimbuff, then the OpenCL kernel will simply not write out the intensity data. This is the first moment when we actually use the SP-MC ringbuffer properly and actually cycle through the frames, producing into them one by one.
This commit is contained in:
@@ -165,7 +165,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
||||
|
||||
// Complete any running kernels
|
||||
if (compactIsRunning) { compactKernelComplete(true); }
|
||||
if (collateIsRunning) { collateKernelComplete(true); }
|
||||
if (collateIsRunning) { collateKernelComplete(std::nullopt, true); }
|
||||
|
||||
// Release OpenCL buffers via smo hooks
|
||||
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
|
||||
@@ -325,6 +325,7 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
|
||||
|
||||
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||
collateKernelCbFn callback)
|
||||
{
|
||||
// Store the caller's callback
|
||||
@@ -345,8 +346,8 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
};
|
||||
|
||||
// Setup args callable
|
||||
auto setupArgs = [this, &assemblyBuff]() {
|
||||
return setupCollateDgramsArgs(assemblyBuff);
|
||||
auto setupArgs = [this, &assemblyBuff, intensityStimFrame]() {
|
||||
return setupCollateDgramsArgs(assemblyBuff, intensityStimFrame);
|
||||
};
|
||||
|
||||
/** EXPLANATION:
|
||||
@@ -374,6 +375,28 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
|
||||
unmapCollationBuffer();
|
||||
|
||||
// Map/unmap intensity buffer if it exists
|
||||
if (intensityStimFrame.has_value())
|
||||
{
|
||||
StimulusFrame& intensityFrame = intensityStimFrame->get();
|
||||
cl_mem intensityClBuffer = intensityFrame.clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
|
||||
if (intensityClBuffer)
|
||||
{
|
||||
void* mappedIntensityBuffer = nullptr;
|
||||
if (!mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
||||
CL_MAP_WRITE_INVALIDATE_REGION, mappedIntensityBuffer))
|
||||
{
|
||||
std::cerr << __func__ << ": failed to map intensity buffer"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate global work size (just num slots in the frame)
|
||||
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
||||
|
||||
@@ -528,7 +551,8 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
||||
}
|
||||
|
||||
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
StagingBuffer& assemblyBuff)
|
||||
StagingBuffer& assemblyBuff,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame)
|
||||
{
|
||||
// Extract parameters for collateDgrams kernel
|
||||
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||
@@ -563,7 +587,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 2, sizeof(uint32_t), &slotStride);
|
||||
// Set intensity buffer argument (arg 2)
|
||||
cl_mem intensityClBuffer = nullptr;
|
||||
if (intensityStimFrame.has_value())
|
||||
{
|
||||
intensityClBuffer = intensityStimFrame->get().clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
}
|
||||
err = clSetKernelArg(collateKernel, 2, sizeof(cl_mem), &intensityClBuffer);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 2: " << err
|
||||
@@ -571,7 +602,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &nPointsPerSlot);
|
||||
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &slotStride);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
|
||||
@@ -579,7 +610,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nDgramsPerFrame);
|
||||
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
|
||||
@@ -587,6 +618,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -611,8 +650,9 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
|
||||
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
|
||||
else { mapFlags = CL_MAP_READ; }
|
||||
|
||||
mapAssemblyBuffer(mapFlags);
|
||||
unmapAssemblyBuffer();
|
||||
if (mapAssemblyBuffer(mapFlags)) {
|
||||
unmapAssemblyBuffer();
|
||||
}
|
||||
clFlush(computeDevice->commandQueue);
|
||||
|
||||
// Stop only compact kernel
|
||||
@@ -628,7 +668,9 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
|
||||
compactIsRunning = false;
|
||||
}
|
||||
|
||||
void OpenClCollatingAndMeshingEngine::collateKernelComplete(bool isFinalizing)
|
||||
void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||
bool isFinalizing)
|
||||
{
|
||||
cl_map_flags mapFlags;
|
||||
/** EXPLANATION:
|
||||
@@ -638,8 +680,28 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(bool isFinalizing)
|
||||
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
|
||||
else { mapFlags = CL_MAP_READ; }
|
||||
|
||||
mapCollationBuffer(mapFlags);
|
||||
unmapCollationBuffer();
|
||||
if (mapCollationBuffer(mapFlags)) {
|
||||
unmapCollationBuffer();
|
||||
}
|
||||
|
||||
// Map/unmap intensity buffer if it exists
|
||||
if (intensityStimFrame.has_value())
|
||||
{
|
||||
StimulusFrame& intensityFrame = intensityStimFrame->get();
|
||||
cl_mem intensityClBuffer = intensityFrame.clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
|
||||
if (intensityClBuffer)
|
||||
{
|
||||
void* mappedIntensityBuffer = nullptr;
|
||||
if (mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
||||
CL_MAP_READ, mappedIntensityBuffer))
|
||||
{
|
||||
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clFlush(computeDevice->commandQueue);
|
||||
|
||||
// Stop only collate kernel
|
||||
@@ -770,18 +832,21 @@ private:
|
||||
OpenClCollatingAndMeshingEngine& engine;
|
||||
AsynchronousLoop frameAssemblyResult;
|
||||
StimulusFrame& stimulusFrame;
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
|
||||
|
||||
public:
|
||||
CompactCollateAndMeshFrameReq(
|
||||
OpenClCollatingAndMeshingEngine& engine_,
|
||||
AsynchronousLoop& asyncLoop,
|
||||
StimulusFrame& stimulusFrame_,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
|
||||
const std::shared_ptr<ComponentThread>& caller,
|
||||
Callback<compactCollateAndMeshFrameReqCbFn> cb)
|
||||
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
|
||||
caller, cb),
|
||||
engine(engine_),
|
||||
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_)
|
||||
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
|
||||
intensityStimFrame(intensityStimFrame_)
|
||||
{}
|
||||
|
||||
public:
|
||||
@@ -876,6 +941,7 @@ public:
|
||||
|
||||
bool success = engine.startCollateKernel(
|
||||
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
|
||||
context->intensityStimFrame,
|
||||
std::bind(
|
||||
&CompactCollateAndMeshFrameReq
|
||||
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
|
||||
@@ -884,7 +950,7 @@ public:
|
||||
|
||||
if (!success)
|
||||
{
|
||||
engine.collateKernelComplete();
|
||||
engine.collateKernelComplete(context->intensityStimFrame);
|
||||
callOriginalCallback(false);
|
||||
return;
|
||||
}
|
||||
@@ -904,7 +970,16 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
engine.collateKernelComplete();
|
||||
/** EXPLANATION:
|
||||
* The reason we don't call collateKernelComplete before checking
|
||||
* shouldAcceptRequests is because if shouldAcceptRequests is false, then
|
||||
* we shouldn't touch any of the collate cycle state...or any state within
|
||||
* the engine at all, since finalize() may well have been called.
|
||||
*
|
||||
* Therefore it's finalize()'s responsibility to ensure that it properly
|
||||
* completes/cleans up any in-flight operations.
|
||||
*/
|
||||
engine.collateKernelComplete(context->intensityStimFrame);
|
||||
// Record collate kernel end time
|
||||
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
@@ -947,6 +1022,7 @@ public:
|
||||
|
||||
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
||||
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||
Callback<compactCollateAndMeshFrameReqCbFn> callback)
|
||||
{
|
||||
{
|
||||
@@ -960,7 +1036,7 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
||||
|
||||
auto caller = smoHooksPtr->ComponentThread_getSelf();
|
||||
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
|
||||
*this, asyncLoop, stimulusFrame,
|
||||
*this, asyncLoop, stimulusFrame, intensityStimFrame,
|
||||
caller,
|
||||
std::move(callback));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user