PcloudStimProducer,OClCollMeshEngn: Produce ambience stim feature

The collation kernel now also produces the ambience stim feature
values into the ambience stimbuff frames.
This commit is contained in:
2025-11-23 07:20:55 -04:00
parent e689063a8c
commit ce690bc3f4
5 changed files with 214 additions and 26 deletions
+38
View File
@@ -25,6 +25,8 @@ __kernel void collate(
__global uchar* assembly,
__global float* collation,
__global float* intensityBuffer,
__global uint* ambienceBuffer,
uint ambienceHighVal,
uint slotStride,
uint nPointsPerSlot,
uint nDgramsPerFrame)
@@ -53,6 +55,9 @@ __kernel void collate(
uint intensityBaseOffset = slotIndex * nPointsPerSlot;
DBG_PRINTF("Running kernel: about to process points in slot.\n");
// Initialize ambience counter for this work item
uint ambienceCount = 0;
// Process based on data type using nested ifs (outer) with loops (inner)
if (dataType == 0)
{
@@ -92,6 +97,10 @@ __kernel void collate(
if (intensityBuffer != NULL) {
intensityBuffer[intensityBaseOffset + i] = intensity;
}
// Count high intensity values for ambience buffer
if (intensity >= ambienceHighVal) {
++ambienceCount;
}
// Don't write intensity to collation buffer
}
}
@@ -134,6 +143,10 @@ __kernel void collate(
if (intensityBuffer != NULL) {
intensityBuffer[intensityBaseOffset + i] = intensity;
}
// Count high intensity values for ambience buffer
if (intensity >= ambienceHighVal) {
++ambienceCount;
}
// Don't write intensity to collation buffer
}
}
@@ -177,6 +190,10 @@ __kernel void collate(
if (intensityBuffer != NULL) {
intensityBuffer[intensityBaseOffset + pointIndex] = intensity1;
}
// Count high intensity values for ambience buffer
if (intensity1 >= ambienceHighVal) {
++ambienceCount;
}
// Don't write intensity to collation buffer
++pointIndex;
@@ -208,6 +225,10 @@ __kernel void collate(
if (intensityBuffer != NULL) {
intensityBuffer[intensityBaseOffset + pointIndex] = intensity2;
}
// Count high intensity values for ambience buffer
if (intensity2 >= ambienceHighVal) {
++ambienceCount;
}
// Don't write intensity to collation buffer
++pointIndex;
}
@@ -252,6 +273,10 @@ __kernel void collate(
if (intensityBuffer != NULL) {
intensityBuffer[intensityBaseOffset + pointIndex] = intensity1;
}
// Count high intensity values for ambience buffer
if (intensity1 >= ambienceHighVal) {
++ambienceCount;
}
// Don't write intensity to collation buffer
++pointIndex;
@@ -283,6 +308,10 @@ __kernel void collate(
if (intensityBuffer != NULL) {
intensityBuffer[intensityBaseOffset + pointIndex] = intensity2;
}
// Count high intensity values for ambience buffer
if (intensity2 >= ambienceHighVal) {
++ambienceCount;
}
// Don't write intensity to collation buffer
++pointIndex;
@@ -314,9 +343,18 @@ __kernel void collate(
if (intensityBuffer != NULL) {
intensityBuffer[intensityBaseOffset + pointIndex] = intensity3;
}
// Count high intensity values for ambience buffer
if (intensity3 >= ambienceHighVal) {
++ambienceCount;
}
// Don't write intensity to collation buffer
++pointIndex;
}
}
// Unsupported data types are silently ignored
// Write ambience count for this work item (once at the end)
if (ambienceBuffer != NULL) {
ambienceBuffer[slotIndex] = ambienceCount;
}
}
@@ -2,6 +2,7 @@
#include <stdexcept>
#include <iostream>
#include <cstring>
#include <cstddef>
#include <vector>
#include <string>
#include <string_view>
@@ -165,7 +166,7 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Complete any running kernels
if (compactIsRunning) { compactKernelComplete(true); }
if (collateIsRunning) { collateKernelComplete(std::nullopt, true); }
if (collateIsRunning) { collateKernelComplete(std::nullopt, std::nullopt, true); }
// Release OpenCL buffers via smo hooks
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
@@ -326,6 +327,7 @@ bool OpenClCollatingAndMeshingEngine::startCompactKernel(
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
collateKernelCbFn callback)
{
// Store the caller's callback
@@ -346,8 +348,12 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
};
// Setup args callable
auto setupArgs = [this, &assemblyBuff, intensityStimFrame]() {
return setupCollateDgramsArgs(assemblyBuff, intensityStimFrame);
auto setupArgs = [
this, &assemblyBuff, intensityStimFrame, ambienceStimFrame
]()
{
return setupCollateDgramsArgs(
assemblyBuff, intensityStimFrame, ambienceStimFrame);
};
/** EXPLANATION:
@@ -385,7 +391,8 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
if (intensityClBuffer)
{
void* mappedIntensityBuffer = nullptr;
if (!mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
if (!mapBuffer(
intensityClBuffer, intensityFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedIntensityBuffer))
{
std::cerr << __func__ << ": failed to map intensity buffer"
@@ -397,6 +404,29 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
}
}
// Map/unmap ambience buffer if it exists
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (!mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer))
{
std::cerr << __func__ << ": failed to map ambience buffer"
<< std::endl;
return false;
}
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
// Calculate global work size (just num slots in the frame)
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
@@ -552,7 +582,8 @@ bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
StagingBuffer& assemblyBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame)
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame)
{
// Extract parameters for collateDgrams kernel
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
@@ -602,7 +633,14 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 3, sizeof(uint32_t), &slotStride);
// Set ambience buffer argument (arg 3)
cl_mem ambienceClBuffer = nullptr;
if (ambienceStimFrame.has_value())
{
ambienceClBuffer = ambienceStimFrame->get().clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
}
err = clSetKernelArg(collateKernel, 3, sizeof(cl_mem), &ambienceClBuffer);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
@@ -610,7 +648,12 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &nPointsPerSlot);
// Set ambienceHighVal argument (arg 4)
uint32_t ambienceHighVal = 0;
if (ambienceStimFrame.has_value() && parent.ambienceStimulusBuffer) {
ambienceHighVal = parent.ambienceStimulusBuffer->ambienceHighVal;
}
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &ambienceHighVal);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
@@ -618,10 +661,26 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false;
}
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nDgramsPerFrame);
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &slotStride);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
<< std::endl;
return false;
}
err = clSetKernelArg(collateKernel, 6, sizeof(uint32_t), &nPointsPerSlot);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 6: " << err
<< std::endl;
return false;
}
err = clSetKernelArg(collateKernel, 7, sizeof(uint32_t), &nDgramsPerFrame);
if (err != CL_SUCCESS)
{
std::cerr << __func__ << ": failed to set kernel arg 7: " << err
<< std::endl;
return false;
}
@@ -670,6 +729,7 @@ void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
void OpenClCollatingAndMeshingEngine::collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool isFinalizing)
{
cl_map_flags mapFlags;
@@ -694,7 +754,8 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
if (intensityClBuffer)
{
void* mappedIntensityBuffer = nullptr;
if (mapBuffer(intensityClBuffer, intensityFrame.slotDesc.nBytes,
if (mapBuffer(
intensityClBuffer, intensityFrame.slotDesc.nBytes,
CL_MAP_READ, mappedIntensityBuffer))
{
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
@@ -702,6 +763,25 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
}
}
// Map/unmap ambience buffer if it exists
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_READ, mappedAmbienceBuffer))
{
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
}
clFlush(computeDevice->commandQueue);
// Stop only collate kernel
@@ -833,6 +913,7 @@ private:
AsynchronousLoop frameAssemblyResult;
StimulusFrame& stimulusFrame;
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame;
public:
CompactCollateAndMeshFrameReq(
@@ -840,13 +921,15 @@ public:
AsynchronousLoop& asyncLoop,
StimulusFrame& stimulusFrame_,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame_,
const std::shared_ptr<ComponentThread>& caller,
Callback<compactCollateAndMeshFrameReqCbFn> cb)
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
caller, cb),
engine(engine_),
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
intensityStimFrame(intensityStimFrame_)
intensityStimFrame(intensityStimFrame_),
ambienceStimFrame(ambienceStimFrame_)
{}
public:
@@ -941,7 +1024,7 @@ public:
bool success = engine.startCollateKernel(
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
context->intensityStimFrame,
context->intensityStimFrame, context->ambienceStimFrame,
std::bind(
&CompactCollateAndMeshFrameReq
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
@@ -950,7 +1033,9 @@ public:
if (!success)
{
engine.collateKernelComplete(context->intensityStimFrame);
engine.collateKernelComplete(
context->intensityStimFrame, context->ambienceStimFrame);
callOriginalCallback(false);
return;
}
@@ -979,7 +1064,9 @@ public:
* Therefore it's finalize()'s responsibility to ensure that it properly
* completes/cleans up any in-flight operations.
*/
engine.collateKernelComplete(context->intensityStimFrame);
engine.collateKernelComplete(
context->intensityStimFrame, context->ambienceStimFrame);
// Record collate kernel end time
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
@@ -1015,12 +1102,29 @@ public:
}
}
std::cout << __func__ << ": ringBufferIndex="
<< context->intensityStimFrame->get().ringBufferIndex
// Sum up ambience counts from ambience buffer
uint32_t ambienceCountSum = 0;
if (context->ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
uint32_t* ambienceCounts = reinterpret_cast<uint32_t*>(ambienceFrame.slotDesc.vaddr);
for (uint32_t i = 0; i < nSucceeded; ++i)
{
ambienceCountSum += ambienceCounts[i];
}
}
std::cout << __func__ << ": intensityRingBufferIndex="
<< (context->intensityStimFrame.has_value() ?
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", ambienceRingBufferIndex="
<< (context->ambienceStimFrame.has_value() ?
context->ambienceStimFrame->get().ringBufferIndex : SIZE_MAX)
<< ", pointsPerDgram=" << pointsPerDgram
<< ", nSucceeded=" << nSucceeded
<< ", totalPoints=" << totalPoints
<< ", highIntensityCount=" << highIntensityCount << std::endl;
<< ", highIntensityCount=" << highIntensityCount
<< ", ambienceCountSum=" << ambienceCountSum << std::endl;
callOriginalCallback(success);
}
@@ -1029,6 +1133,7 @@ public:
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback)
{
{
@@ -1042,7 +1147,7 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
auto caller = smoHooksPtr->ComponentThread_getSelf();
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
*this, asyncLoop, stimulusFrame, intensityStimFrame,
*this, asyncLoop, stimulusFrame, intensityStimFrame, ambienceStimFrame,
caller,
std::move(callback));
@@ -50,6 +50,7 @@ public:
void compactCollateAndMeshFrameReq(
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback);
private:
@@ -63,11 +64,13 @@ private:
bool startCollateKernel(
StagingBuffer& assemblyBuff, StagingBuffer& collationBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
collateKernelCbFn callback);
void compactKernelComplete(bool isFinalizing=false);
void collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool isFinalizing=false);
bool stop();
@@ -138,7 +141,8 @@ private:
StagingBuffer& assemblyBuff, uint32_t nSucceeded);
bool setupCollateDgramsArgs(
StagingBuffer& assemblyBuff,
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame);
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame);
// Generic buffer mapping/unmapping for zero-copy synchronization
bool mapBuffer(
@@ -2,6 +2,7 @@
#define _LIVOX_GEN1_PCLOUD_AMBIENCE_STIMULUS_BUFFER_H
#include <memory>
#include <cstdint>
#include <user/stimulusBuffer.h>
#include <user/stagingBuffer.h>
@@ -25,11 +26,13 @@ public:
const StagingBuffer::IOEngineConstraints& inputEngineConstraints,
const StagingBuffer::IOEngineConstraints& outputEngineConstraints,
const SmoCallbacks& callbacks,
cl_mem_flags flags)
cl_mem_flags flags,
uint32_t ambienceHighVal_)
: StimulusBuffer(
parent, deviceAttachmentSpec, histbuffMs,
inputEngineConstraints, outputEngineConstraints,
callbacks, flags)
callbacks, flags),
ambienceHighVal(ambienceHighVal_)
{}
~PcloudAmbienceStimulusBuffer() = default;
@@ -39,6 +42,9 @@ public:
PcloudAmbienceStimulusBuffer& operator=(const PcloudAmbienceStimulusBuffer&) = delete;
PcloudAmbienceStimulusBuffer(PcloudAmbienceStimulusBuffer&&) = default;
PcloudAmbienceStimulusBuffer& operator=(PcloudAmbienceStimulusBuffer&&) = default;
public:
uint32_t ambienceHighVal;
};
} // namespace stim_buff
@@ -56,7 +56,7 @@ static StagingBuffer::IOEngineConstraints openClAmbienceInputConstraints(
// slotStartAlignmentByteVal (page alignment)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
// slotPadToNBytes: This is dynamically calculated based on the return mode.
sizeof(float),
sizeof(uint32_t),
// frameStartAlignmentByteVal (page alignment)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
// framePadToNBytes (pointer size)
@@ -295,18 +295,29 @@ std::cout << __func__ << ": $$$$$$$ Created PcloudIntensityStimulusBuffer" << st
}
else if (qualeIfaceApi == "pcloudAmbience")
{
// Parse ambienceHighVal from stimBuffApiParams
const std::vector<std::string> ambienceHighValParamNames = {
"high-value",
"high-val"
};
int ambienceHighValInt = device::DeviceAttachmentSpec
::parseOptionalParamAsIntWithSynonyms(
deviceAttachmentSpec->stimBuffApiParams,
ambienceHighValParamNames, 116);
uint32_t ambienceHighVal = static_cast<uint32_t>(ambienceHighValInt);
/* Calculate slotStrideNBytes:
* nDgramsPerStagingBufferFrame * sizeof(float)
* nDgramsPerStagingBufferFrame * sizeof(uint32_t)
*/
size_t slotStrideNBytes = this->nDgramsPerStagingBufferFrame
* sizeof(float);
* sizeof(uint32_t);
// Reuse openClAmbienceInputConstraints, only modify slotPadToNBytes
openClAmbienceInputConstraints.slotPadToNBytes = slotStrideNBytes;
auto ambienceBuffer = std::make_shared<PcloudAmbienceStimulusBuffer>(
*this, deviceAttachmentSpec, histbuffMs,
openClAmbienceInputConstraints, openClAmbienceInputConstraints,
*smoHooksPtr, CL_MEM_READ_WRITE);
*smoHooksPtr, CL_MEM_READ_WRITE, ambienceHighVal);
std::cout << __func__ << ": $$$$$$$ Created PcloudAmbienceStimulusBuffer" << std::endl;
this->stop();
@@ -337,6 +348,7 @@ private:
AsynchronousLoop frameAssemblyResult;
StimulusFrame& stimulusFrame;
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame;
public:
ProduceFrameReq(
@@ -412,8 +424,27 @@ public:
context->intensityStimFrame = std::nullopt;
}
// Check if ambience buffer is attached and acquire frame if so
if (pcloudProducer.ambienceStimulusBuffer)
{
size_t ambienceRingbuffIndex = pcloudProducer
.ambienceStimulusBuffer->ringBuffer.getIndexToProduceInto();
StimulusFrame& ambienceStimFrame = pcloudProducer
.ambienceStimulusBuffer->ringBuffer.getDataAtSlot(
ambienceRingbuffIndex);
ambienceStimFrame.lock.writeAcquire();
context->ambienceStimFrame = std::make_optional(
std::ref(ambienceStimFrame));
}
else {
context->ambienceStimFrame = std::nullopt;
}
pcloudProducer.openClCollatingAndMeshingEngine.compactCollateAndMeshFrameReq(
loop, stimulusFrame, context->intensityStimFrame,
loop, stimulusFrame,
context->intensityStimFrame, context->ambienceStimFrame,
{context, std::bind(
&ProduceFrameReq::produceFrameReq3_compactCollateDone,
context.get(), context,
@@ -428,6 +459,10 @@ public:
if (context->intensityStimFrame.has_value()) {
context->intensityStimFrame->get().lock.writeRelease();
}
// Release ambience frame if it was used
if (context->ambienceStimFrame.has_value()) {
context->ambienceStimFrame->get().lock.writeRelease();
}
SpinLock::Guard lock(pcloudProducer.shouldContinueLock);
if (!pcloudProducer.shouldContinue)