OClCollMeshEngn: produce ambience into stimBuff frames directly

This commit is contained in:
2026-04-04 13:17:43 -04:00
parent 1c0f028de0
commit e8044a0d17
10 changed files with 101 additions and 333 deletions
@@ -35,10 +35,8 @@ OpenClCollatingAndMeshingEngine::OpenClCollatingAndMeshingEngine(
computeDevice(nullptr),
clAssemblyBufferClBuffer(nullptr),
clCollationBufferClBuffer(nullptr),
clAverageIntensityBufferClBuffer(nullptr),
clAssemblyBuffer(nullptr),
clCollationBuffer(nullptr),
clAverageIntensityBuffer(nullptr),
shouldAcceptRequests(false),
compactIsRunning(false),
collateIsRunning(false),
@@ -47,11 +45,8 @@ assemblyBufferPtr(nullptr),
assemblyBufferSize(0),
collationBufferPtr(nullptr),
collationBufferSize(0),
averageIntensityBufferPtr(nullptr),
averageIntensityBufferSize(0),
mappedAssemblyBuffer(nullptr),
mappedCollationBuffer(nullptr),
mappedAverageIntensityBuffer(nullptr),
frameAssemblyDesc(nullptr)
{
}
@@ -90,15 +85,11 @@ bool OpenClCollatingAndMeshingEngine::setup()
// Get StagingBuffer memory pointers from parent
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
struct iovec averageIntensityIov = parent.averageIntensityBuffer
.getClEngineIovec();
assemblyBufferPtr = assemblyIov.iov_base;
assemblyBufferSize = assemblyIov.iov_len;
collationBufferPtr = collationIov.iov_base;
collationBufferSize = collationIov.iov_len;
averageIntensityBufferPtr = averageIntensityIov.iov_base;
averageIntensityBufferSize = averageIntensityIov.iov_len;
// Get FrameAssemblyDesc from assembly buffer
frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>(
@@ -140,28 +131,13 @@ bool OpenClCollatingAndMeshingEngine::setup()
return false;
}
auto wip_clAverageIntensityBufferClBuffer = smoHooksPtr
->ComputeManager_createUseHostPtrBuffer(
averageIntensityBufferPtr, averageIntensityBufferSize,
CL_MEM_WRITE_ONLY);
if (!wip_clAverageIntensityBufferClBuffer)
{
std::cerr << __func__ << ": failed to create average intensity buffer"
<< std::endl;
return false;
}
// Cache cl_mem handles for the device we're using
cl_mem wip_clAssemblyBuffer = wip_clAssemblyBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice);
cl_mem wip_clCollationBuffer = wip_clCollationBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice);
cl_mem wip_clAverageIntensityBuffer = wip_clAverageIntensityBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice);
if (!wip_clAssemblyBuffer || !wip_clCollationBuffer
|| !wip_clAverageIntensityBuffer)
if (!wip_clAssemblyBuffer || !wip_clCollationBuffer)
{
std::cerr << __func__ << ": failed to get buffer handles for device"
<< std::endl;
@@ -186,10 +162,8 @@ bool OpenClCollatingAndMeshingEngine::setup()
computeDevice = wip_computeDevice;
clAssemblyBufferClBuffer = wip_clAssemblyBufferClBuffer;
clCollationBufferClBuffer = wip_clCollationBufferClBuffer;
clAverageIntensityBufferClBuffer = wip_clAverageIntensityBufferClBuffer;
clAssemblyBuffer = wip_clAssemblyBuffer;
clCollationBuffer = wip_clCollationBuffer;
clAverageIntensityBuffer = wip_clAverageIntensityBuffer;
slotCompactorProgram = std::move(wip_slotCompactorProgram);
collateProgram = std::move(wip_collateProgram);
slotCompactorKernel = std::move(wip_slotCompactorKernel);
@@ -253,12 +227,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Release OpenCL buffers via smo hooks
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
{
if (clAverageIntensityBufferClBuffer)
{
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
clAverageIntensityBufferClBuffer);
clAverageIntensityBufferClBuffer.reset();
}
if (clCollationBufferClBuffer)
{
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
@@ -275,7 +243,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Reset cached cl_mem handles
clCollationBuffer = nullptr;
clAverageIntensityBuffer = nullptr;
clAssemblyBuffer = nullptr;
// Release kernels and programs (handled automatically by unique_ptr destructors)
@@ -301,8 +268,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
assemblyBufferSize = 0;
collationBufferPtr = nullptr;
collationBufferSize = 0;
averageIntensityBufferPtr = nullptr;
averageIntensityBufferSize = 0;
frameAssemblyDesc = nullptr;
}
@@ -409,15 +374,11 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
auto validateBuffers = [this]() {
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
struct iovec averageIntensityIov = parent.averageIntensityBuffer
.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize
|| collationIov.iov_base != collationBufferPtr
|| collationIov.iov_len != collationBufferSize
|| averageIntensityIov.iov_base != averageIntensityBufferPtr
|| averageIntensityIov.iov_len != averageIntensityBufferSize)
|| collationIov.iov_len != collationBufferSize)
{
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
@@ -454,14 +415,6 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
}
unmapCollationBuffer();
if (!mapAverageIntensityBuffer(CL_MAP_WRITE))
{
std::cerr << __func__ << ": failed to map average intensity buffer"
<< std::endl;
return false;
}
unmapAverageIntensityBuffer();
// Map/unmap intensity buffer if it exists
if (intensityStimFrame.has_value())
@@ -486,6 +439,29 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
}
}
// Map/unmap ambience stim frame buffer (collate writes per-slot averages here)
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (!mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer))
{
std::cerr << __func__ << ": failed to map ambience buffer"
<< std::endl;
return false;
}
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
// Calculate global work size (just num slots in the frame)
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
@@ -708,21 +684,26 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
// Set average intensity buffer argument (arg 3)
/** EXPLANATION:
* We only pass the average intensity buffer argument to the collate kernel
* when ambienceStimFrame is present. This is because the collate kernel
* only needs the average intensity buffer if ambience processing is
* requested (i.e., the ambience stimulus buffer is attached). If no
* ambienceStimFrame is supplied, we skip passing the buffer to avoid
* unnecessary work.
*/
cl_mem averageIntensityClBuffer = nullptr;
if (ambienceStimFrame.has_value()) {
averageIntensityClBuffer = clAverageIntensityBuffer;
// Set ambience buffer argument (arg 3): acquired PcloudAmbience StimulusFrame
cl_mem ambienceClBufferArg = nullptr;
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
const size_t needBytes = static_cast<size_t>(nDgramsPerFrame)
* sizeof(float);
if (ambienceFrame.slotDesc.nBytes < needBytes)
{
std::cerr << __func__ << ": ambience stim frame slot too small: "
<< ambienceFrame.slotDesc.nBytes << " < " << needBytes
<< std::endl;
return false;
}
ambienceClBufferArg = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
}
err = clSetKernelArg(
collateKernel.get(), 3, sizeof(cl_mem), &averageIntensityClBuffer);
collateKernel.get(), 3, sizeof(cl_mem), &ambienceClBufferArg);
if (err != CL_SUCCESS)
{
@@ -804,8 +785,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool isFinalizing)
{
(void)ambienceStimFrame;
cl_map_flags mapFlags;
/** EXPLANATION:
* Technically we should only need to do this if we plan to read the
@@ -818,10 +797,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
unmapCollationBuffer();
}
if (mapAverageIntensityBuffer(mapFlags)) {
unmapAverageIntensityBuffer();
}
// Map/unmap intensity buffer if it exists
if (intensityStimFrame.has_value())
{
@@ -841,6 +816,25 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
}
}
// Sync GPU writes into ambience stim frame host backing store
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_READ, mappedAmbienceBuffer))
{
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
}
clFlush(computeDevice->commandQueue);
// Stop only collate kernel
@@ -962,60 +956,6 @@ bool OpenClCollatingAndMeshingEngine::unmapCollationBuffer()
return true;
}
bool OpenClCollatingAndMeshingEngine::mapAverageIntensityBuffer(
cl_map_flags mapFlags
)
{
return mapBuffer(
clAverageIntensityBuffer, averageIntensityBufferSize, mapFlags,
mappedAverageIntensityBuffer);
}
bool OpenClCollatingAndMeshingEngine::unmapAverageIntensityBuffer()
{
unmapBuffer(clAverageIntensityBuffer, mappedAverageIntensityBuffer);
mappedAverageIntensityBuffer = nullptr;
return true;
}
void OpenClCollatingAndMeshingEngine::produceAmbienceStimulusFrame(
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
uint32_t nSucceeded)
{
if (!ambienceStimFrame.has_value()) { return; }
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
if (!ambienceBuff) { return; }
const auto& ambienceCountComparator = ambienceBuff->ambienceCountComparator;
// Read average intensity values from averageIntensityBuffer
float* averageIntensityAverages = reinterpret_cast<float*>(
averageIntensityBufferPtr);
uint32_t ambiencePassbandCount = 0;
if (ambienceCountComparator.has_value())
{
// Count frames whose average intensity matches the configured comparator.
for (uint32_t i = 0; i < nSucceeded; ++i)
{
float avg = averageIntensityAverages[i];
if (ambienceCountComparator.value()(avg)) {
++ambiencePassbandCount;
}
}
}
// Write the ambience count to the ambienceStimFrame
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
using PcloudAmbienceStimVal = PcloudAmbienceStencil
::PcloudAmbienceStimulusValue;
PcloudAmbienceStimVal* ambienceValue = reinterpret_cast<
PcloudAmbienceStimVal*>(ambienceFrame.slotDesc.vaddr);
ambienceValue[0] = ambiencePassbandCount;
}
class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq
: public sscl::PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>
{
@@ -1191,13 +1131,6 @@ public:
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
// Produce ambience frame if ambience buffer is attached
if (context->ambienceStimFrame.has_value())
{
engine.produceAmbienceStimulusFrame(
context->ambienceStimFrame, nSucceeded);
}
int returnMode = static_cast<int>(engine.parent.device->currentReturnMode);
size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
returnMode);
@@ -1218,46 +1151,10 @@ public:
}
}
}
(void)highIntensityCount;
#if 0
// Print all averages above thresholds from average intensity buffer
if (context->ambienceStimFrame.has_value())
{
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
engine.parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
const auto& ambienceCountComparator =
ambienceBuff->ambienceCountComparator;
uint32_t postrinThreshold = ambienceBuff->postrinInterestThreshold;
float* averageIntensityAverages = reinterpret_cast<float*>(
engine.averageIntensityBufferPtr);
// Count frames that met the postrin threshold
uint32_t framesMetThreshold = 0;
for (uint32_t i = 0; i < nSucceeded; ++i)
{
float avg = averageIntensityAverages[i];
if (ambienceCountComparator(avg)) {
++framesMetThreshold;
}
}
// Read the stimFrame value (ambience count)
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
using PcloudAmbienceStimVal = PcloudAmbienceStencil
::PcloudAmbienceStimulusValue;
PcloudAmbienceStimVal* ambienceValue = reinterpret_cast<
PcloudAmbienceStimVal*>(ambienceFrame.slotDesc.vaddr);
PcloudAmbienceStimVal stimFrameValue = ambienceValue[0];
bool meetsPostrinThreshold = (framesMetThreshold >= postrinThreshold);
std::cout << __func__ << ": frames met threshold=" << framesMetThreshold
<< ", stimFrame value=" << stimFrameValue
<< ", postrin threshold=" << postrinThreshold
<< ", meets postrin=" << (meetsPostrinThreshold ? "yes" : "no")
<< std::endl;
}
// Legacy debug: ambience floats live in ambienceStimFrame after collate
std::cout << __func__ << ": intensityRingBufferIndex="
<< (context->intensityStimFrame.has_value() ?
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)