OClCollMeshEngn: Produce ambience count; set postrin threshold

We modify the semantics/meaning of the ambience stim feature.
It now represents the number of frames whose average intensity
is below the ambienceLowVal.

We can now implement the postrin as the event wherein the number
of frames whose intensity <= ambienceLowVal exceeds
postrin-interest-threshold.
This commit is contained in:
2025-11-28 00:12:18 -04:00
parent 5b19a70c75
commit 280b6f7d1c
7 changed files with 300 additions and 163 deletions
@@ -37,8 +37,10 @@ slotCompactorProgram(nullptr), collateProgram(nullptr),
slotCompactorKernel(nullptr), collateKernel(nullptr),
clAssemblyBufferClBuffer(nullptr),
clCollationBufferClBuffer(nullptr),
clAverageIntensityBufferClBuffer(nullptr),
clAssemblyBuffer(nullptr),
clCollationBuffer(nullptr),
clAverageIntensityBuffer(nullptr),
shouldAcceptRequests(false),
compactIsRunning(false),
collateIsRunning(false),
@@ -47,8 +49,11 @@ assemblyBufferPtr(nullptr),
assemblyBufferSize(0),
collationBufferPtr(nullptr),
collationBufferSize(0),
averageIntensityBufferPtr(nullptr),
averageIntensityBufferSize(0),
mappedAssemblyBuffer(nullptr),
mappedCollationBuffer(nullptr),
mappedAverageIntensityBuffer(nullptr),
frameAssemblyDesc(nullptr)
{
}
@@ -87,11 +92,15 @@ bool OpenClCollatingAndMeshingEngine::setup()
// Get StagingBuffer memory pointers from parent
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
struct iovec averageIntensityIov = parent.averageIntensityBuffer
.getClEngineIovec();
assemblyBufferPtr = assemblyIov.iov_base;
assemblyBufferSize = assemblyIov.iov_len;
collationBufferPtr = collationIov.iov_base;
collationBufferSize = collationIov.iov_len;
averageIntensityBufferPtr = averageIntensityIov.iov_base;
averageIntensityBufferSize = averageIntensityIov.iov_len;
// Get FrameAssemblyDesc from assembly buffer
frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>(
@@ -133,13 +142,27 @@ bool OpenClCollatingAndMeshingEngine::setup()
goto cleanup;
}
clAverageIntensityBufferClBuffer = smoHooksPtr
->ComputeManager_createUseHostPtrBuffer(
averageIntensityBufferPtr, averageIntensityBufferSize,
CL_MEM_WRITE_ONLY);
if (!clAverageIntensityBufferClBuffer)
{
std::cerr << __func__ << ": failed to create average intensity buffer"
<< std::endl;
goto cleanup;
}
// Cache cl_mem handles for the device we're using
clAssemblyBuffer = clAssemblyBufferClBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
clCollationBuffer = clCollationBufferClBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
clAverageIntensityBuffer = clAverageIntensityBufferClBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (!clAssemblyBuffer || !clCollationBuffer)
if (!clAssemblyBuffer || !clCollationBuffer || !clAverageIntensityBuffer)
{
std::cerr << __func__ << ": failed to get buffer handles for device"
<< std::endl;
@@ -213,6 +236,12 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Release OpenCL buffers via smo hooks
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
{
if (clAverageIntensityBufferClBuffer)
{
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
clAverageIntensityBufferClBuffer);
clAverageIntensityBufferClBuffer.reset();
}
if (clCollationBufferClBuffer)
{
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
@@ -229,6 +258,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Reset cached cl_mem handles
clCollationBuffer = nullptr;
clAverageIntensityBuffer = nullptr;
clAssemblyBuffer = nullptr;
// Release kernels
@@ -272,6 +302,8 @@ void OpenClCollatingAndMeshingEngine::finalize()
assemblyBufferSize = 0;
collationBufferPtr = nullptr;
collationBufferSize = 0;
averageIntensityBufferPtr = nullptr;
averageIntensityBufferSize = 0;
frameAssemblyDesc = nullptr;
}
@@ -367,7 +399,6 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
}
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
collateKernelCbFn callback)
@@ -376,13 +407,18 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
collateKernelCb = std::move(callback);
// Validate buffers callable
auto validateBuffers = [this, &assemblyBuff, &collationBuff]() {
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
struct iovec collationIov = collationBuff.getClEngineIovec();
auto validateBuffers = [this]() {
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
struct iovec averageIntensityIov = parent.averageIntensityBuffer
.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize
|| collationIov.iov_base != collationBufferPtr
|| collationIov.iov_len != collationBufferSize)
|| collationIov.iov_len != collationBufferSize
|| averageIntensityIov.iov_base != averageIntensityBufferPtr
|| averageIntensityIov.iov_len != averageIntensityBufferSize)
{
throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed");
@@ -390,12 +426,9 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
};
// Setup args callable
auto setupArgs = [
this, &assemblyBuff, intensityStimFrame, ambienceStimFrame
]()
auto setupArgs = [this, intensityStimFrame, ambienceStimFrame]()
{
return setupCollateDgramsArgs(
assemblyBuff, intensityStimFrame, ambienceStimFrame);
return setupCollateDgramsArgs(intensityStimFrame, ambienceStimFrame);
};
/** EXPLANATION:
@@ -422,6 +455,14 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
}
unmapCollationBuffer();
if (!mapAverageIntensityBuffer(CL_MAP_WRITE))
{
std::cerr << __func__ << ": failed to map average intensity buffer"
<< std::endl;
return false;
}
unmapAverageIntensityBuffer();
// Map/unmap intensity buffer if it exists
if (intensityStimFrame.has_value())
@@ -446,29 +487,6 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
}
}
// Map/unmap ambience buffer if it exists
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (!mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer))
{
std::cerr << __func__ << ": failed to map ambience buffer"
<< std::endl;
return false;
}
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
// Calculate global work size (just num slots in the frame)
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
@@ -623,12 +641,12 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
}
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
StagingBuffer& assemblyBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame)
{
// Extract parameters for collateDgrams kernel
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
uint32_t slotStride = static_cast<uint32_t>(
parent.assemblyBuffer.slotStrideNBytes);
// Calculate nPointsPerSlot from device return mode
if (!parent.device)
@@ -675,14 +693,22 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
// Set ambience buffer argument (arg 3)
cl_mem ambienceClBuffer = nullptr;
if (ambienceStimFrame.has_value())
{
ambienceClBuffer = ambienceStimFrame->get().clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
// Set average intensity buffer argument (arg 3)
/** EXPLANATION:
* We only pass the average intensity buffer argument to the collate kernel
* when ambienceStimFrame is present. This is because the collate kernel
* only needs the average intensity buffer if ambience processing is
* requested (i.e., the ambience stimulus buffer is attached). If no
* ambienceStimFrame is supplied, we skip passing the buffer to avoid
* unnecessary work.
*/
cl_mem averageIntensityClBuffer = nullptr;
if (ambienceStimFrame.has_value()) {
averageIntensityClBuffer = clAverageIntensityBuffer;
}
err = clSetKernelArg(collateKernel, 3, sizeof(cl_mem), &ambienceClBuffer);
err = clSetKernelArg(
collateKernel, 3, sizeof(cl_mem), &averageIntensityClBuffer);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
@@ -690,17 +716,7 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
// Set ambienceHighVal argument (arg 4)
uint32_t ambienceHighVal = 0;
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff = nullptr;
if (ambienceStimFrame.has_value()
&& (ambienceBuff = parent.ambienceStimulusBuffer.load(
std::memory_order_acquire)))
{
ambienceHighVal = ambienceBuff->ambienceHighVal;
}
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &ambienceHighVal);
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &slotStride);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
@@ -708,15 +724,15 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &slotStride);
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nPointsPerSlot);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
<< std::endl;
return false;
}
err = clSetKernelArg(collateKernel, 6, sizeof(uint32_t), &nPointsPerSlot);
err = clSetKernelArg(collateKernel, 6, sizeof(uint32_t), &nDgramsPerFrame);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 6: " << err
@@ -724,14 +740,6 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 7, sizeof(uint32_t), &nDgramsPerFrame);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 7: " << err
<< std::endl;
return false;
}
return true;
}
@@ -779,6 +787,8 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool isFinalizing)
{
(void)ambienceStimFrame;
cl_map_flags mapFlags;
/** EXPLANATION:
* Technically we should only need to do this if we plan to read the
@@ -791,6 +801,10 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
unmapCollationBuffer();
}
if (mapAverageIntensityBuffer(mapFlags)) {
unmapAverageIntensityBuffer();
}
// Map/unmap intensity buffer if it exists
if (intensityStimFrame.has_value())
{
@@ -810,25 +824,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
}
}
// Map/unmap ambience buffer if it exists
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_READ, mappedAmbienceBuffer))
{
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
}
clFlush(computeDevice->commandQueue);
// Stop only collate kernel
@@ -952,6 +947,56 @@ bool OpenClCollatingAndMeshingEngine::unmapCollationBuffer()
return true;
}
bool OpenClCollatingAndMeshingEngine::mapAverageIntensityBuffer(
cl_map_flags mapFlags
)
{
return mapBuffer(
clAverageIntensityBuffer, averageIntensityBufferSize, mapFlags,
mappedAverageIntensityBuffer);
}
bool OpenClCollatingAndMeshingEngine::unmapAverageIntensityBuffer()
{
unmapBuffer(clAverageIntensityBuffer, mappedAverageIntensityBuffer);
mappedAverageIntensityBuffer = nullptr;
return true;
}
void OpenClCollatingAndMeshingEngine::produceAmbienceStimulusFrame(
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
uint32_t nSucceeded)
{
if (!ambienceStimFrame.has_value()) { return; }
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
if (!ambienceBuff) { return; }
uint32_t lowVal = ambienceBuff->ambienceIntensityLowVal;
// Read average intensity values from averageIntensityBuffer
float* averageIntensityAverages = reinterpret_cast<float*>(
averageIntensityBufferPtr);
// Count frames whose average intensity is <= lowVal (postrin only)
uint32_t ambienceCount = 0;
for (uint32_t i = 0; i < nSucceeded; ++i)
{
float avg = averageIntensityAverages[i];
if (avg <= static_cast<float>(lowVal))
{
++ambienceCount;
}
}
// Write the ambience count to the ambienceStimFrame
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
uint32_t* ambienceValue = reinterpret_cast<uint32_t*>(
ambienceFrame.slotDesc.vaddr);
ambienceValue[0] = ambienceCount;
}
class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq
: public PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>
{
@@ -1070,7 +1115,6 @@ public:
engine.collateKernelStartTime = std::chrono::high_resolution_clock::now();
bool success = engine.startCollateKernel(
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
context->intensityStimFrame, context->ambienceStimFrame,
std::bind(
&CompactCollateAndMeshFrameReq
@@ -1126,11 +1170,18 @@ public:
return;
}
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
// Produce ambience frame if ambience buffer is attached
if (context->ambienceStimFrame.has_value())
{
engine.produceAmbienceStimulusFrame(
context->ambienceStimFrame, nSucceeded);
}
int returnMode = static_cast<int>(engine.parent.device->currentReturnMode);
size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
returnMode);
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
size_t totalPoints = nSucceeded * pointsPerDgram;
// Count points with intensity greater than 116
@@ -1149,16 +1200,40 @@ public:
}
}
// Sum up ambience counts from ambience buffer
uint32_t ambienceCountSum = 0;
// Print all averages above thresholds from average intensity buffer
if (context->ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
uint32_t* ambienceCounts = reinterpret_cast<uint32_t*>(ambienceFrame.slotDesc.vaddr);
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
engine.parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
uint32_t lowVal = ambienceBuff->ambienceIntensityLowVal;
uint32_t postrinThreshold = ambienceBuff->postrinInterestThreshold;
float* averageIntensityAverages = reinterpret_cast<float*>(
engine.averageIntensityBufferPtr);
// Count frames that met the postrin threshold
uint32_t framesMetThreshold = 0;
for (uint32_t i = 0; i < nSucceeded; ++i)
{
ambienceCountSum += ambienceCounts[i];
float avg = averageIntensityAverages[i];
if (avg <= static_cast<float>(lowVal))
{
++framesMetThreshold;
}
}
// Read the stimFrame value (ambience count)
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
uint32_t* ambienceValue = reinterpret_cast<uint32_t*>(
ambienceFrame.slotDesc.vaddr);
uint32_t stimFrameValue = ambienceValue[0];
bool meetsPostrinThreshold = (framesMetThreshold >= postrinThreshold);
std::cout << __func__ << ": frames met threshold=" << framesMetThreshold
<< ", stimFrame value=" << stimFrameValue
<< ", postrin threshold=" << postrinThreshold
<< ", meets postrin=" << (meetsPostrinThreshold ? "yes" : "no")
<< std::endl;
}
std::cout << __func__ << ": intensityRingBufferIndex="
@@ -1170,8 +1245,7 @@ public:
<< ", pointsPerDgram=" << pointsPerDgram
<< ", nSucceeded=" << nSucceeded
<< ", totalPoints=" << totalPoints
<< ", highIntensityCount=" << highIntensityCount
<< ", ambienceCountSum=" << ambienceCountSum << std::endl;
<< ", highIntensityCount=" << highIntensityCount << std::endl;
callOriginalCallback(success);
}