PcloudStimProducer,OClCollMeshEngn: Produce ambience stim feature
The collation kernel now also produces the ambience stim feature values into the ambience stimbuff frames.
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
@@ -165,7 +166,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
||||
|
||||
// Complete any running kernels
|
||||
if (compactIsRunning) { compactKernelComplete(true); }
|
||||
if (collateIsRunning) { collateKernelComplete(std::nullopt, true); }
|
||||
if (collateIsRunning) { collateKernelComplete(std::nullopt, std::nullopt, true); }
|
||||
|
||||
// Release OpenCL buffers via smo hooks
|
||||
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
|
||||
@@ -326,6 +327,7 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
|
||||
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
||||
collateKernelCbFn callback)
|
||||
{
|
||||
// Store the caller's callback
|
||||
@@ -346,8 +348,12 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
};
|
||||
|
||||
// Setup args callable
|
||||
auto setupArgs = [this, &assemblyBuff, intensityStimFrame]() {
|
||||
return setupCollateDgramsArgs(assemblyBuff, intensityStimFrame);
|
||||
auto setupArgs = [
|
||||
this, &assemblyBuff, intensityStimFrame, ambienceStimFrame
|
||||
]()
|
||||
{
|
||||
return setupCollateDgramsArgs(
|
||||
assemblyBuff, intensityStimFrame, ambienceStimFrame);
|
||||
};
|
||||
|
||||
/** EXPLANATION:
|
||||
@@ -385,7 +391,8 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
if (intensityClBuffer)
|
||||
{
|
||||
void* mappedIntensityBuffer = nullptr;
|
||||
if (!mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
||||
if (!mapBuffer(
|
||||
intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
||||
CL_MAP_WRITE_INVALIDATE_REGION, mappedIntensityBuffer))
|
||||
{
|
||||
std::cerr << __func__ << ": failed to map intensity buffer"
|
||||
@@ -397,6 +404,29 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
}
|
||||
}
|
||||
|
||||
// Map/unmap ambience buffer if it exists
|
||||
if (ambienceStimFrame.has_value())
|
||||
{
|
||||
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
|
||||
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
|
||||
if (ambienceClBuffer)
|
||||
{
|
||||
void* mappedAmbienceBuffer = nullptr;
|
||||
if (!mapBuffer(
|
||||
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
|
||||
CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer))
|
||||
{
|
||||
std::cerr << __func__ << ": failed to map ambience buffer"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate global work size (just num slots in the frame)
|
||||
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
||||
|
||||
@@ -552,7 +582,8 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
||||
|
||||
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
StagingBuffer& assemblyBuff,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame)
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame)
|
||||
{
|
||||
// Extract parameters for collateDgrams kernel
|
||||
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
||||
@@ -602,7 +633,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &slotStride);
|
||||
// Set ambience buffer argument (arg 3)
|
||||
cl_mem ambienceClBuffer = nullptr;
|
||||
if (ambienceStimFrame.has_value())
|
||||
{
|
||||
ambienceClBuffer = ambienceStimFrame->get().clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
}
|
||||
err = clSetKernelArg(collateKernel, 3, sizeof(cl_mem), &ambienceClBuffer);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
|
||||
@@ -610,7 +648,12 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot);
|
||||
// Set ambienceHighVal argument (arg 4)
|
||||
uint32_t ambienceHighVal = 0;
|
||||
if (ambienceStimFrame.has_value() && parent.ambienceStimulusBuffer) {
|
||||
ambienceHighVal = parent.ambienceStimulusBuffer->ambienceHighVal;
|
||||
}
|
||||
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &ambienceHighVal);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
|
||||
@@ -618,10 +661,26 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame);
|
||||
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &slotStride);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
|
||||
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 6, sizeof(uint32_t), &nPointsPerSlot);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 6: " << err
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(collateKernel, 7, sizeof(uint32_t), &nDgramsPerFrame);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to set kernel arg 7: " << err
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
@@ -670,6 +729,7 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
|
||||
|
||||
void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
||||
bool isFinalizing)
|
||||
{
|
||||
cl_map_flags mapFlags;
|
||||
@@ -694,7 +754,8 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
||||
if (intensityClBuffer)
|
||||
{
|
||||
void* mappedIntensityBuffer = nullptr;
|
||||
if (mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
||||
if (mapBuffer(
|
||||
intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
||||
CL_MAP_READ, mappedIntensityBuffer))
|
||||
{
|
||||
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
|
||||
@@ -702,6 +763,25 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
||||
}
|
||||
}
|
||||
|
||||
// Map/unmap ambience buffer if it exists
|
||||
if (ambienceStimFrame.has_value())
|
||||
{
|
||||
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
|
||||
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
|
||||
if (ambienceClBuffer)
|
||||
{
|
||||
void* mappedAmbienceBuffer = nullptr;
|
||||
if (mapBuffer(
|
||||
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
|
||||
CL_MAP_READ, mappedAmbienceBuffer))
|
||||
{
|
||||
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clFlush(computeDevice->commandQueue);
|
||||
|
||||
// Stop only collate kernel
|
||||
@@ -833,6 +913,7 @@ private:
|
||||
AsynchronousLoop frameAssemblyResult;
|
||||
StimulusFrame& stimulusFrame;
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame;
|
||||
|
||||
public:
|
||||
CompactCollateAndMeshFrameReq(
|
||||
@@ -840,13 +921,15 @@ public:
|
||||
AsynchronousLoop& asyncLoop,
|
||||
StimulusFrame& stimulusFrame_,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame_,
|
||||
const std::shared_ptr<ComponentThread>& caller,
|
||||
Callback<compactCollateAndMeshFrameReqCbFn> cb)
|
||||
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
|
||||
caller, cb),
|
||||
engine(engine_),
|
||||
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
|
||||
intensityStimFrame(intensityStimFrame_)
|
||||
intensityStimFrame(intensityStimFrame_),
|
||||
ambienceStimFrame(ambienceStimFrame_)
|
||||
{}
|
||||
|
||||
public:
|
||||
@@ -941,7 +1024,7 @@ public:
|
||||
|
||||
bool success = engine.startCollateKernel(
|
||||
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
|
||||
context->intensityStimFrame,
|
||||
context->intensityStimFrame, context->ambienceStimFrame,
|
||||
std::bind(
|
||||
&CompactCollateAndMeshFrameReq
|
||||
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
|
||||
@@ -950,7 +1033,9 @@ public:
|
||||
|
||||
if (!success)
|
||||
{
|
||||
engine.collateKernelComplete(context->intensityStimFrame);
|
||||
engine.collateKernelComplete(
|
||||
context->intensityStimFrame, context->ambienceStimFrame);
|
||||
|
||||
callOriginalCallback(false);
|
||||
return;
|
||||
}
|
||||
@@ -979,7 +1064,9 @@ public:
|
||||
* Therefore it's finalize()'s responsibility to ensure that it properly
|
||||
* completes/cleans up any in-flight operations.
|
||||
*/
|
||||
engine.collateKernelComplete(context->intensityStimFrame);
|
||||
engine.collateKernelComplete(
|
||||
context->intensityStimFrame, context->ambienceStimFrame);
|
||||
|
||||
// Record collate kernel end time
|
||||
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
@@ -1015,12 +1102,29 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << __func__ << ": ringBufferIndex="
|
||||
<< context->intensityStimFrame->get().ringBufferIndex
|
||||
// Sum up ambience counts from ambience buffer
|
||||
uint32_t ambienceCountSum = 0;
|
||||
if (context->ambienceStimFrame.has_value())
|
||||
{
|
||||
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
|
||||
uint32_t* ambienceCounts = reinterpret_cast<uint32_t*>(ambienceFrame.slotDesc.vaddr);
|
||||
for (uint32_t i = 0; i < nSucceeded; ++i)
|
||||
{
|
||||
ambienceCountSum += ambienceCounts[i];
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << __func__ << ": intensityRingBufferIndex="
|
||||
<< (context->intensityStimFrame.has_value() ?
|
||||
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
|
||||
<< ", ambienceRingBufferIndex="
|
||||
<< (context->ambienceStimFrame.has_value() ?
|
||||
context->ambienceStimFrame->get().ringBufferIndex : SIZE_MAX)
|
||||
<< ", pointsPerDgram=" << pointsPerDgram
|
||||
<< ", nSucceeded=" << nSucceeded
|
||||
<< ", totalPoints=" << totalPoints
|
||||
<< ", highIntensityCount=" << highIntensityCount << std::endl;
|
||||
<< ", highIntensityCount=" << highIntensityCount
|
||||
<< ", ambienceCountSum=" << ambienceCountSum << std::endl;
|
||||
|
||||
callOriginalCallback(success);
|
||||
}
|
||||
@@ -1029,6 +1133,7 @@ public:
|
||||
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
||||
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
||||
Callback<compactCollateAndMeshFrameReqCbFn> callback)
|
||||
{
|
||||
{
|
||||
@@ -1042,7 +1147,7 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
||||
|
||||
auto caller = smoHooksPtr->ComponentThread_getSelf();
|
||||
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
|
||||
*this, asyncLoop, stimulusFrame, intensityStimFrame,
|
||||
*this, asyncLoop, stimulusFrame, intensityStimFrame, ambienceStimFrame,
|
||||
caller,
|
||||
std::move(callback));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user