PcloudStimProducer,OClCollMeshEngn: Produce ambience stim feature

The collation kernel now also produces the ambience stim feature
values into the ambience stimbuff frames.
This commit is contained in:
2025-11-23 07:20:55 -04:00
parent e689063a8c
commit ce690bc3f4
5 changed files with 214 additions and 26 deletions
@@ -2,6 +2,7 @@
#include <stdexcept>
#include <iostream>
#include <cstring>
#include <cstddef>
#include <vector>
#include <string>
#include <string_view>
@@ -165,7 +166,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Complete any running kernels
if (compactIsRunning) { compactKernelComplete(true); }
if (collateIsRunning) { collateKernelComplete(std::nullopt, true); }
if (collateIsRunning) { collateKernelComplete(std::nullopt, std::nullopt, true); }
// Release OpenCL buffers via smo hooks
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
@@ -326,6 +327,7 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
collateKernelCbFn callback)
{
// Store the caller's callback
@@ -346,8 +348,12 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
};
// Setup args callable
auto setupArgs = [this, &assemblyBuff, intensityStimFrame]() {
return setupCollateDgramsArgs(assemblyBuff, intensityStimFrame);
auto setupArgs = [
this, &assemblyBuff, intensityStimFrame, ambienceStimFrame
]()
{
return setupCollateDgramsArgs(
assemblyBuff, intensityStimFrame, ambienceStimFrame);
};
/** EXPLANATION:
@@ -385,7 +391,8 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
if (intensityClBuffer)
{
void* mappedIntensityBuffer = nullptr;
if (!mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
if (!mapBuffer(
intensityClBuffer, intensityFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedIntensityBuffer))
{
std::cerr << __func__ << ": failed to map intensity buffer"
@@ -397,6 +404,29 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
}
}
// Map/unmap ambience buffer if it exists
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (!mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer))
{
std::cerr << __func__ << ": failed to map ambience buffer"
<< std::endl;
return false;
}
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
// Calculate global work size (just num slots in the frame)
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
@@ -552,7 +582,8 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
StagingBuffer& assemblyBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame)
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame)
{
// Extract parameters for collateDgrams kernel
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
@@ -602,7 +633,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &slotStride);
// Set ambience buffer argument (arg 3)
cl_mem ambienceClBuffer = nullptr;
if (ambienceStimFrame.has_value())
{
ambienceClBuffer = ambienceStimFrame->get().clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
}
err = clSetKernelArg(collateKernel, 3, sizeof(cl_mem), &ambienceClBuffer);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
@@ -610,7 +648,12 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot);
// Set ambienceHighVal argument (arg 4)
uint32_t ambienceHighVal = 0;
if (ambienceStimFrame.has_value() && parent.ambienceStimulusBuffer) {
ambienceHighVal = parent.ambienceStimulusBuffer->ambienceHighVal;
}
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &ambienceHighVal);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
@@ -618,10 +661,26 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame);
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &slotStride);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
<< std::endl;
return false;
}
err = clSetKernelArg(collateKernel, 6, sizeof(uint32_t), &nPointsPerSlot);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 6: " << err
<< std::endl;
return false;
}
err = clSetKernelArg(collateKernel, 7, sizeof(uint32_t), &nDgramsPerFrame);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 7: " << err
<< std::endl;
return false;
}
@@ -670,6 +729,7 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
void OpenClCollatingAndMeshingEngine::collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool isFinalizing)
{
cl_map_flags mapFlags;
@@ -694,7 +754,8 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
if (intensityClBuffer)
{
void* mappedIntensityBuffer = nullptr;
if (mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
if (mapBuffer(
intensityClBuffer, intensityFrame.slotDesc.nBytes,
CL_MAP_READ, mappedIntensityBuffer))
{
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
@@ -702,6 +763,25 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
}
}
// Map/unmap ambience buffer if it exists
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_READ, mappedAmbienceBuffer))
{
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
}
clFlush(computeDevice->commandQueue);
// Stop only collate kernel
@@ -833,6 +913,7 @@ private:
AsynchronousLoop frameAssemblyResult;
StimulusFrame& stimulusFrame;
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame;
public:
CompactCollateAndMeshFrameReq(
@@ -840,13 +921,15 @@ public:
AsynchronousLoop& asyncLoop,
StimulusFrame& stimulusFrame_,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame_,
const std::shared_ptr<ComponentThread>& caller,
Callback<compactCollateAndMeshFrameReqCbFn> cb)
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
caller, cb),
engine(engine_),
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
intensityStimFrame(intensityStimFrame_)
intensityStimFrame(intensityStimFrame_),
ambienceStimFrame(ambienceStimFrame_)
{}
public:
@@ -941,7 +1024,7 @@ public:
bool success = engine.startCollateKernel(
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
context->intensityStimFrame,
context->intensityStimFrame, context->ambienceStimFrame,
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
@@ -950,7 +1033,9 @@ public:
if (!success)
{
engine.collateKernelComplete(context->intensityStimFrame);
engine.collateKernelComplete(
context->intensityStimFrame, context->ambienceStimFrame);
callOriginalCallback(false);
return;
}
@@ -979,7 +1064,9 @@ public:
* Therefore it's finalize()'s responsibility to ensure that it properly
* completes/cleans up any in-flight operations.
*/
engine.collateKernelComplete(context->intensityStimFrame);
engine.collateKernelComplete(
context->intensityStimFrame, context->ambienceStimFrame);
// Record collate kernel end time
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
@@ -1015,12 +1102,29 @@ public:
}
}
std::cout << __func__ << ": ringBufferIndex="
<< context->intensityStimFrame->get().ringBufferIndex
// Sum up ambience counts from ambience buffer
uint32_t ambienceCountSum = 0;
if (context->ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
uint32_t* ambienceCounts = reinterpret_cast<uint32_t*>(ambienceFrame.slotDesc.vaddr);
for (uint32_t i = 0; i < nSucceeded; ++i)
{
ambienceCountSum += ambienceCounts[i];
}
}
std::cout << __func__ << ": intensityRingBufferIndex="
<< (context->intensityStimFrame.has_value() ?
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", ambienceRingBufferIndex="
<< (context->ambienceStimFrame.has_value() ?
context->ambienceStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", pointsPerDgram=" << pointsPerDgram
<< ", nSucceeded=" << nSucceeded
<< ", totalPoints=" << totalPoints
<< ", highIntensityCount=" << highIntensityCount << std::endl;
<< ", highIntensityCount=" << highIntensityCount
<< ", ambienceCountSum=" << ambienceCountSum << std::endl;
callOriginalCallback(success);
}
@@ -1029,6 +1133,7 @@ public:
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback)
{
{
@@ -1042,7 +1147,7 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
auto caller = smoHooksPtr->ComponentThread_getSelf();
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
*this, asyncLoop, stimulusFrame, intensityStimFrame,
*this, asyncLoop, stimulusFrame, intensityStimFrame, ambienceStimFrame,
caller,
std::move(callback));