OClCollMeshEngn,PcloudStimProd: Produce into intensity stimbuff
PcloudStimulusBuffer::produceFrameReq(): Now correctly produces into the stim frames for the PcloudIntensityStimulusBuffer object that's attached to the PcloudStimulusProducer. If there's no attached I stimbuff, then the OpenCL kernel will simply not write out the intensity data. This is the first moment when we actually use the SP-MC ringbuffer properly and actually cycle through the frames, producing into them one by one.
This commit is contained in:
@@ -24,6 +24,7 @@ inline int readInt32LE(__global uchar* ptr)
|
|||||||
__kernel void collate(
|
__kernel void collate(
|
||||||
__global uchar* assembly,
|
__global uchar* assembly,
|
||||||
__global float* collation,
|
__global float* collation,
|
||||||
|
__global float* intensityBuffer,
|
||||||
uint slotStride,
|
uint slotStride,
|
||||||
uint nPointsPerSlot,
|
uint nPointsPerSlot,
|
||||||
uint nDgramsPerFrame)
|
uint nDgramsPerFrame)
|
||||||
@@ -44,9 +45,12 @@ __kernel void collate(
|
|||||||
__global uchar* pointsArray = slotStart + 18;
|
__global uchar* pointsArray = slotStart + 18;
|
||||||
|
|
||||||
// Base offset in collation buffer for this slot (in floats)
|
// Base offset in collation buffer for this slot (in floats)
|
||||||
// Each PointXYZI is 4 floats (x, y, z, intensity)
|
// Each PointXYZ is 3 floats (x, y, z)
|
||||||
#define FLOATS_PER_POINT 4
|
#define FLOATS_PER_POINT 3
|
||||||
uint collationBaseOffset = slotIndex * nPointsPerSlot * FLOATS_PER_POINT;
|
uint collationBaseOffset = slotIndex * nPointsPerSlot * FLOATS_PER_POINT;
|
||||||
|
// Base offset in intensity buffer for this slot (in floats)
|
||||||
|
// Each intensity is 1 float
|
||||||
|
uint intensityBaseOffset = slotIndex * nPointsPerSlot;
|
||||||
DBG_PRINTF("Running kernel: about to process points in slot.\n");
|
DBG_PRINTF("Running kernel: about to process points in slot.\n");
|
||||||
|
|
||||||
// Process based on data type using nested ifs (outer) with loops (inner)
|
// Process based on data type using nested ifs (outer) with loops (inner)
|
||||||
@@ -79,12 +83,16 @@ __kernel void collate(
|
|||||||
slotIndex, i, intensity);
|
slotIndex, i, intensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write to collation buffer
|
// Write XYZ to collation buffer
|
||||||
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
||||||
collation[offset + 0] = x;
|
collation[offset + 0] = x;
|
||||||
collation[offset + 1] = y;
|
collation[offset + 1] = y;
|
||||||
collation[offset + 2] = z;
|
collation[offset + 2] = z;
|
||||||
collation[offset + 3] = intensity;
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
||||||
|
if (intensityBuffer != NULL) {
|
||||||
|
intensityBuffer[intensityBaseOffset + i] = intensity;
|
||||||
|
}
|
||||||
|
// Don't write intensity to collation buffer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (dataType == 2)
|
else if (dataType == 2)
|
||||||
@@ -117,12 +125,16 @@ __kernel void collate(
|
|||||||
slotIndex, i, intensity);
|
slotIndex, i, intensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write to collation buffer
|
// Write XYZ to collation buffer
|
||||||
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
uint offset = collationBaseOffset + (i * FLOATS_PER_POINT);
|
||||||
collation[offset + 0] = x;
|
collation[offset + 0] = x;
|
||||||
collation[offset + 1] = y;
|
collation[offset + 1] = y;
|
||||||
collation[offset + 2] = z;
|
collation[offset + 2] = z;
|
||||||
collation[offset + 3] = intensity;
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
||||||
|
if (intensityBuffer != NULL) {
|
||||||
|
intensityBuffer[intensityBaseOffset + i] = intensity;
|
||||||
|
}
|
||||||
|
// Don't write intensity to collation buffer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (dataType == 4)
|
else if (dataType == 4)
|
||||||
@@ -161,7 +173,11 @@ __kernel void collate(
|
|||||||
collation[offset1 + 0] = x1;
|
collation[offset1 + 0] = x1;
|
||||||
collation[offset1 + 1] = y1;
|
collation[offset1 + 1] = y1;
|
||||||
collation[offset1 + 2] = z1;
|
collation[offset1 + 2] = z1;
|
||||||
collation[offset1 + 3] = intensity1;
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
||||||
|
if (intensityBuffer != NULL) {
|
||||||
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity1;
|
||||||
|
}
|
||||||
|
// Don't write intensity to collation buffer
|
||||||
++pointIndex;
|
++pointIndex;
|
||||||
|
|
||||||
// Process second point
|
// Process second point
|
||||||
@@ -188,7 +204,11 @@ __kernel void collate(
|
|||||||
collation[offset2 + 0] = x2;
|
collation[offset2 + 0] = x2;
|
||||||
collation[offset2 + 1] = y2;
|
collation[offset2 + 1] = y2;
|
||||||
collation[offset2 + 2] = z2;
|
collation[offset2 + 2] = z2;
|
||||||
collation[offset2 + 3] = intensity2;
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
||||||
|
if (intensityBuffer != NULL) {
|
||||||
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity2;
|
||||||
|
}
|
||||||
|
// Don't write intensity to collation buffer
|
||||||
++pointIndex;
|
++pointIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -228,7 +248,11 @@ __kernel void collate(
|
|||||||
collation[offset1 + 0] = x1;
|
collation[offset1 + 0] = x1;
|
||||||
collation[offset1 + 1] = y1;
|
collation[offset1 + 1] = y1;
|
||||||
collation[offset1 + 2] = z1;
|
collation[offset1 + 2] = z1;
|
||||||
collation[offset1 + 3] = intensity1;
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
||||||
|
if (intensityBuffer != NULL) {
|
||||||
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity1;
|
||||||
|
}
|
||||||
|
// Don't write intensity to collation buffer
|
||||||
++pointIndex;
|
++pointIndex;
|
||||||
|
|
||||||
// Process second point
|
// Process second point
|
||||||
@@ -255,7 +279,11 @@ __kernel void collate(
|
|||||||
collation[offset2 + 0] = x2;
|
collation[offset2 + 0] = x2;
|
||||||
collation[offset2 + 1] = y2;
|
collation[offset2 + 1] = y2;
|
||||||
collation[offset2 + 2] = z2;
|
collation[offset2 + 2] = z2;
|
||||||
collation[offset2 + 3] = intensity2;
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
||||||
|
if (intensityBuffer != NULL) {
|
||||||
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity2;
|
||||||
|
}
|
||||||
|
// Don't write intensity to collation buffer
|
||||||
++pointIndex;
|
++pointIndex;
|
||||||
|
|
||||||
// Process third point
|
// Process third point
|
||||||
@@ -282,7 +310,11 @@ __kernel void collate(
|
|||||||
collation[offset3 + 0] = x3;
|
collation[offset3 + 0] = x3;
|
||||||
collation[offset3 + 1] = y3;
|
collation[offset3 + 1] = y3;
|
||||||
collation[offset3 + 2] = z3;
|
collation[offset3 + 2] = z3;
|
||||||
collation[offset3 + 3] = intensity3;
|
// Write intensity conditionally - divert to intensity buffer if attached, else discard
|
||||||
|
if (intensityBuffer != NULL) {
|
||||||
|
intensityBuffer[intensityBaseOffset + pointIndex] = intensity3;
|
||||||
|
}
|
||||||
|
// Don't write intensity to collation buffer
|
||||||
++pointIndex;
|
++pointIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -165,7 +165,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
|||||||
|
|
||||||
// Complete any running kernels
|
// Complete any running kernels
|
||||||
if (compactIsRunning) { compactKernelComplete(true); }
|
if (compactIsRunning) { compactKernelComplete(true); }
|
||||||
if (collateIsRunning) { collateKernelComplete(true); }
|
if (collateIsRunning) { collateKernelComplete(std::nullopt, true); }
|
||||||
|
|
||||||
// Release OpenCL buffers via smo hooks
|
// Release OpenCL buffers via smo hooks
|
||||||
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
|
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
|
||||||
@@ -325,6 +325,7 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
|
|||||||
|
|
||||||
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||||
collateKernelCbFn callback)
|
collateKernelCbFn callback)
|
||||||
{
|
{
|
||||||
// Store the caller's callback
|
// Store the caller's callback
|
||||||
@@ -345,8 +346,8 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Setup args callable
|
// Setup args callable
|
||||||
auto setupArgs = [this, &assemblyBuff]() {
|
auto setupArgs = [this, &assemblyBuff, intensityStimFrame]() {
|
||||||
return setupCollateDgramsArgs(assemblyBuff);
|
return setupCollateDgramsArgs(assemblyBuff, intensityStimFrame);
|
||||||
};
|
};
|
||||||
|
|
||||||
/** EXPLANATION:
|
/** EXPLANATION:
|
||||||
@@ -374,6 +375,28 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
|||||||
|
|
||||||
unmapCollationBuffer();
|
unmapCollationBuffer();
|
||||||
|
|
||||||
|
// Map/unmap intensity buffer if it exists
|
||||||
|
if (intensityStimFrame.has_value())
|
||||||
|
{
|
||||||
|
StimulusFrame& intensityFrame = intensityStimFrame->get();
|
||||||
|
cl_mem intensityClBuffer = intensityFrame.clBuffer
|
||||||
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||||
|
|
||||||
|
if (intensityClBuffer)
|
||||||
|
{
|
||||||
|
void* mappedIntensityBuffer = nullptr;
|
||||||
|
if (!mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
||||||
|
CL_MAP_WRITE_INVALIDATE_REGION, mappedIntensityBuffer))
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to map intensity buffer"
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate global work size (just num slots in the frame)
|
// Calculate global work size (just num slots in the frame)
|
||||||
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
||||||
|
|
||||||
@@ -528,7 +551,8 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||||
StagingBuffer& assemblyBuff)
|
StagingBuffer& assemblyBuff,
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame)
|
||||||
{
|
{
|
||||||
// Extract parameters for collateDgrams kernel
|
// Extract parameters for collateDgrams kernel
|
||||||
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||||
@@ -563,7 +587,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = clSetKernelArg(collateKernel, 2, sizeof(uint32_t), &slotStride);
|
// Set intensity buffer argument (arg 2)
|
||||||
|
cl_mem intensityClBuffer = nullptr;
|
||||||
|
if (intensityStimFrame.has_value())
|
||||||
|
{
|
||||||
|
intensityClBuffer = intensityStimFrame->get().clBuffer
|
||||||
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||||
|
}
|
||||||
|
err = clSetKernelArg(collateKernel, 2, sizeof(cl_mem), &intensityClBuffer);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 2: " << err
|
std::cerr << __func__ << ": failed to set kernel arg 2: " << err
|
||||||
@@ -571,7 +602,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &nPointsPerSlot);
|
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &slotStride);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
|
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
|
||||||
@@ -579,7 +610,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nDgramsPerFrame);
|
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot);
|
||||||
if (err != CL_SUCCESS)
|
if (err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
|
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
|
||||||
@@ -587,6 +618,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame);
|
||||||
|
if (err != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -611,8 +650,9 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
|
|||||||
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
|
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
|
||||||
else { mapFlags = CL_MAP_READ; }
|
else { mapFlags = CL_MAP_READ; }
|
||||||
|
|
||||||
mapAssemblyBuffer(mapFlags);
|
if (mapAssemblyBuffer(mapFlags)) {
|
||||||
unmapAssemblyBuffer();
|
unmapAssemblyBuffer();
|
||||||
|
}
|
||||||
clFlush(computeDevice->commandQueue);
|
clFlush(computeDevice->commandQueue);
|
||||||
|
|
||||||
// Stop only compact kernel
|
// Stop only compact kernel
|
||||||
@@ -628,7 +668,9 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
|
|||||||
compactIsRunning = false;
|
compactIsRunning = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenClCollatingAndMeshingEngine::collateKernelComplete(bool isFinalizing)
|
void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||||
|
bool isFinalizing)
|
||||||
{
|
{
|
||||||
cl_map_flags mapFlags;
|
cl_map_flags mapFlags;
|
||||||
/** EXPLANATION:
|
/** EXPLANATION:
|
||||||
@@ -638,8 +680,28 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(bool isFinalizing)
|
|||||||
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
|
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
|
||||||
else { mapFlags = CL_MAP_READ; }
|
else { mapFlags = CL_MAP_READ; }
|
||||||
|
|
||||||
mapCollationBuffer(mapFlags);
|
if (mapCollationBuffer(mapFlags)) {
|
||||||
unmapCollationBuffer();
|
unmapCollationBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map/unmap intensity buffer if it exists
|
||||||
|
if (intensityStimFrame.has_value())
|
||||||
|
{
|
||||||
|
StimulusFrame& intensityFrame = intensityStimFrame->get();
|
||||||
|
cl_mem intensityClBuffer = intensityFrame.clBuffer
|
||||||
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||||
|
|
||||||
|
if (intensityClBuffer)
|
||||||
|
{
|
||||||
|
void* mappedIntensityBuffer = nullptr;
|
||||||
|
if (mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
||||||
|
CL_MAP_READ, mappedIntensityBuffer))
|
||||||
|
{
|
||||||
|
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
clFlush(computeDevice->commandQueue);
|
clFlush(computeDevice->commandQueue);
|
||||||
|
|
||||||
// Stop only collate kernel
|
// Stop only collate kernel
|
||||||
@@ -770,18 +832,21 @@ private:
|
|||||||
OpenClCollatingAndMeshingEngine& engine;
|
OpenClCollatingAndMeshingEngine& engine;
|
||||||
AsynchronousLoop frameAssemblyResult;
|
AsynchronousLoop frameAssemblyResult;
|
||||||
StimulusFrame& stimulusFrame;
|
StimulusFrame& stimulusFrame;
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CompactCollateAndMeshFrameReq(
|
CompactCollateAndMeshFrameReq(
|
||||||
OpenClCollatingAndMeshingEngine& engine_,
|
OpenClCollatingAndMeshingEngine& engine_,
|
||||||
AsynchronousLoop& asyncLoop,
|
AsynchronousLoop& asyncLoop,
|
||||||
StimulusFrame& stimulusFrame_,
|
StimulusFrame& stimulusFrame_,
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
|
||||||
const std::shared_ptr<ComponentThread>& caller,
|
const std::shared_ptr<ComponentThread>& caller,
|
||||||
Callback<compactCollateAndMeshFrameReqCbFn> cb)
|
Callback<compactCollateAndMeshFrameReqCbFn> cb)
|
||||||
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
|
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
|
||||||
caller, cb),
|
caller, cb),
|
||||||
engine(engine_),
|
engine(engine_),
|
||||||
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_)
|
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
|
||||||
|
intensityStimFrame(intensityStimFrame_)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -876,6 +941,7 @@ public:
|
|||||||
|
|
||||||
bool success = engine.startCollateKernel(
|
bool success = engine.startCollateKernel(
|
||||||
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
|
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
|
||||||
|
context->intensityStimFrame,
|
||||||
std::bind(
|
std::bind(
|
||||||
&CompactCollateAndMeshFrameReq
|
&CompactCollateAndMeshFrameReq
|
||||||
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
|
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
|
||||||
@@ -884,7 +950,7 @@ public:
|
|||||||
|
|
||||||
if (!success)
|
if (!success)
|
||||||
{
|
{
|
||||||
engine.collateKernelComplete();
|
engine.collateKernelComplete(context->intensityStimFrame);
|
||||||
callOriginalCallback(false);
|
callOriginalCallback(false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -904,7 +970,16 @@ public:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
engine.collateKernelComplete();
|
/** EXPLANATION:
|
||||||
|
* The reason we don't call collateKernelComplete before checking
|
||||||
|
* shouldAcceptRequests is because if shouldAcceptRequests is false, then
|
||||||
|
* we shouldn't touch any of the collate cycle state...or any state within
|
||||||
|
* the engine at all, since finalize() may well have been called.
|
||||||
|
*
|
||||||
|
* Therefore it's finalize()'s responsibility to ensure that it properly
|
||||||
|
* completes/cleans up any in-flight operations.
|
||||||
|
*/
|
||||||
|
engine.collateKernelComplete(context->intensityStimFrame);
|
||||||
// Record collate kernel end time
|
// Record collate kernel end time
|
||||||
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
|
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
@@ -947,6 +1022,7 @@ public:
|
|||||||
|
|
||||||
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
||||||
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||||
Callback<compactCollateAndMeshFrameReqCbFn> callback)
|
Callback<compactCollateAndMeshFrameReqCbFn> callback)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
@@ -960,7 +1036,7 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
|||||||
|
|
||||||
auto caller = smoHooksPtr->ComponentThread_getSelf();
|
auto caller = smoHooksPtr->ComponentThread_getSelf();
|
||||||
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
|
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
|
||||||
*this, asyncLoop, stimulusFrame,
|
*this, asyncLoop, stimulusFrame, intensityStimFrame,
|
||||||
caller,
|
caller,
|
||||||
std::move(callback));
|
std::move(callback));
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
@@ -48,6 +49,7 @@ public:
|
|||||||
compactCollateAndMeshFrameReqCbFn;
|
compactCollateAndMeshFrameReqCbFn;
|
||||||
void compactCollateAndMeshFrameReq(
|
void compactCollateAndMeshFrameReq(
|
||||||
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||||
Callback<compactCollateAndMeshFrameReqCbFn> callback);
|
Callback<compactCollateAndMeshFrameReqCbFn> callback);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -60,10 +62,13 @@ private:
|
|||||||
compactKernelCbFn callback);
|
compactKernelCbFn callback);
|
||||||
bool startCollateKernel(
|
bool startCollateKernel(
|
||||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||||
collateKernelCbFn callback);
|
collateKernelCbFn callback);
|
||||||
|
|
||||||
void compactKernelComplete(bool isFinalizing=false);
|
void compactKernelComplete(bool isFinalizing=false);
|
||||||
void collateKernelComplete(bool isFinalizing=false);
|
void collateKernelComplete(
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||||
|
bool isFinalizing=false);
|
||||||
bool stop();
|
bool stop();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -131,7 +136,9 @@ private:
|
|||||||
bool compileAndPrepareKernels();
|
bool compileAndPrepareKernels();
|
||||||
bool setupSlotCompactorsArgs(
|
bool setupSlotCompactorsArgs(
|
||||||
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
|
||||||
bool setupCollateDgramsArgs(StagingBuffer& assemblyBuff);
|
bool setupCollateDgramsArgs(
|
||||||
|
StagingBuffer& assemblyBuff,
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame);
|
||||||
|
|
||||||
// Generic buffer mapping/unmapping for zero-copy synchronization
|
// Generic buffer mapping/unmapping for zero-copy synchronization
|
||||||
bool mapBuffer(
|
bool mapBuffer(
|
||||||
|
|||||||
@@ -22,9 +22,9 @@ static StagingBuffer::IOEngineConstraints openClInputConstraints(
|
|||||||
* This should eventually be aligned to 4B and padded to 12B.
|
* This should eventually be aligned to 4B and padded to 12B.
|
||||||
*/
|
*/
|
||||||
// slotStartAlignmentByteVal (page alignment)
|
// slotStartAlignmentByteVal (page alignment)
|
||||||
sizeof(float) * 4,
|
sizeof(float),
|
||||||
// slotPadToNBytes (pointer size)
|
// slotPadToNBytes (XYZ = 3 floats per point)
|
||||||
sizeof(float) * 4,
|
sizeof(float) * 3,
|
||||||
// frameStartAlignmentByteVal (page alignment)
|
// frameStartAlignmentByteVal (page alignment)
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
// framePadToNBytes (pointer size)
|
// framePadToNBytes (pointer size)
|
||||||
@@ -34,7 +34,7 @@ static StagingBuffer::IOEngineConstraints openClInputConstraints(
|
|||||||
static StagingBuffer::IOEngineConstraints openClMeshInputConstraints(
|
static StagingBuffer::IOEngineConstraints openClMeshInputConstraints(
|
||||||
// slotStartAlignmentByteVal (page alignment)
|
// slotStartAlignmentByteVal (page alignment)
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
// slotPadToNBytes (pointer size)
|
// slotPadToNBytes: This is dynamically calculated based on the return mode.
|
||||||
sizeof(float) * 3,
|
sizeof(float) * 3,
|
||||||
// frameStartAlignmentByteVal (page alignment)
|
// frameStartAlignmentByteVal (page alignment)
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
@@ -44,7 +44,7 @@ static StagingBuffer::IOEngineConstraints openClMeshInputConstraints(
|
|||||||
static StagingBuffer::IOEngineConstraints openClIntensityInputConstraints(
|
static StagingBuffer::IOEngineConstraints openClIntensityInputConstraints(
|
||||||
// slotStartAlignmentByteVal (page alignment)
|
// slotStartAlignmentByteVal (page alignment)
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
// slotPadToNBytes (intensity value size)
|
// slotPadToNBytes: This is dynamically calculated based on the return mode.
|
||||||
sizeof(float),
|
sizeof(float),
|
||||||
// frameStartAlignmentByteVal (page alignment)
|
// frameStartAlignmentByteVal (page alignment)
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
@@ -54,7 +54,7 @@ static StagingBuffer::IOEngineConstraints openClIntensityInputConstraints(
|
|||||||
static StagingBuffer::IOEngineConstraints openClAmbienceInputConstraints(
|
static StagingBuffer::IOEngineConstraints openClAmbienceInputConstraints(
|
||||||
// slotStartAlignmentByteVal (page alignment)
|
// slotStartAlignmentByteVal (page alignment)
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
// slotPadToNBytes (pointer size)
|
// slotPadToNBytes: This is dynamically calculated based on the return mode.
|
||||||
sizeof(float),
|
sizeof(float),
|
||||||
// frameStartAlignmentByteVal (page alignment)
|
// frameStartAlignmentByteVal (page alignment)
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
@@ -335,6 +335,7 @@ private:
|
|||||||
PcloudStimulusProducer& pcloudProducer;
|
PcloudStimulusProducer& pcloudProducer;
|
||||||
AsynchronousLoop frameAssemblyResult;
|
AsynchronousLoop frameAssemblyResult;
|
||||||
StimulusFrame& stimulusFrame;
|
StimulusFrame& stimulusFrame;
|
||||||
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ProduceFrameReq(
|
ProduceFrameReq(
|
||||||
@@ -392,8 +393,26 @@ public:
|
|||||||
|
|
||||||
context->frameAssemblyResult = loop;
|
context->frameAssemblyResult = loop;
|
||||||
|
|
||||||
|
// Check if intensity buffer is attached and acquire frame if so
|
||||||
|
if (pcloudProducer.intensityStimulusBuffer)
|
||||||
|
{
|
||||||
|
size_t intensityRingbuffIndex = pcloudProducer
|
||||||
|
.intensityStimulusBuffer->ringBuffer.getIndexToProduceInto();
|
||||||
|
|
||||||
|
StimulusFrame& intensityStimFrame = pcloudProducer
|
||||||
|
.intensityStimulusBuffer->ringBuffer.getDataAtSlot(
|
||||||
|
intensityRingbuffIndex);
|
||||||
|
|
||||||
|
intensityStimFrame.lock.writeAcquire();
|
||||||
|
context->intensityStimFrame = std::make_optional(
|
||||||
|
std::ref(intensityStimFrame));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
context->intensityStimFrame = std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
pcloudProducer.openClCollatingAndMeshingEngine.compactCollateAndMeshFrameReq(
|
pcloudProducer.openClCollatingAndMeshingEngine.compactCollateAndMeshFrameReq(
|
||||||
loop, stimulusFrame,
|
loop, stimulusFrame, context->intensityStimFrame,
|
||||||
{context, std::bind(
|
{context, std::bind(
|
||||||
&ProduceFrameReq::produceFrameReq3_compactCollateDone,
|
&ProduceFrameReq::produceFrameReq3_compactCollateDone,
|
||||||
context.get(), context,
|
context.get(), context,
|
||||||
@@ -404,6 +423,11 @@ public:
|
|||||||
[[maybe_unused]] std::shared_ptr<ProduceFrameReq> context,
|
[[maybe_unused]] std::shared_ptr<ProduceFrameReq> context,
|
||||||
bool success, StimulusFrame& /*stimulusFrame*/)
|
bool success, StimulusFrame& /*stimulusFrame*/)
|
||||||
{
|
{
|
||||||
|
// Release intensity frame if it was used
|
||||||
|
if (context->intensityStimFrame.has_value()) {
|
||||||
|
context->intensityStimFrame->get().lock.writeRelease();
|
||||||
|
}
|
||||||
|
|
||||||
SpinLock::Guard lock(pcloudProducer.shouldContinueLock);
|
SpinLock::Guard lock(pcloudProducer.shouldContinueLock);
|
||||||
if (!pcloudProducer.shouldContinue)
|
if (!pcloudProducer.shouldContinue)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user