OClCollMeshEngn: produce ambience into stimBuff frames directly

This commit is contained in:
2026-04-04 13:17:43 -04:00
parent 1c0f028de0
commit e8044a0d17
10 changed files with 101 additions and 333 deletions
+8 -4
View File
@@ -77,14 +77,18 @@ raising new intrins.
- Stimbuffs must respect this limit and wait for stencil returns before - Stimbuffs must respect this limit and wait for stencil returns before
allocating new ones allocating new ones
**Example:** **Example (generic device):**
```
+idev|my-device|someQualeApi(n-stencils=4)|someStimBuffApi()|livoxProto1()|SERIAL
```
The `pcloudAmbience` Livox Gen1 path does **not** use the `n-stencils` parameter; ambience data is delivered as a dense float vector in the stimulus frame buffer, not via a separate stencil allocation list.
**Deprecated example (do not use for Livox Gen1 ambience):**
``` ```
+idev|my-device|pcloudAmbience(n-stencils=4)|livoxGen1-pcloud()|livoxProto1()|3JEDK380010Z39 +idev|my-device|pcloudAmbience(n-stencils=4)|livoxGen1-pcloud()|livoxProto1()|3JEDK380010Z39
``` ```
This example allows the stimbuff to allocate up to 4 stencils
simultaneously.
## Notes ## Notes
The stencil registration mechanism discussed above is not currently The stencil registration mechanism discussed above is not currently
+4 -4
View File
@@ -33,7 +33,7 @@ Each stim-buff-api is designed to work with specific stim-iface libraries that u
### 2. Point Cloud Ambience Data Device (Interoceptor) ### 2. Point Cloud Ambience Data Device (Interoceptor)
**Purpose**: Provides ambience data from the LiDAR point cloud, computing average intensity per frame and counting frames that qualify for intrinsic stimuli. **Purpose**: Provides ambience data from the LiDAR point cloud as a **vector of per-dagram average intensities** (one `float` per UDP datagram slot in the staging frame, length `n-dgrams-per-frame`). The OpenCL collate kernel writes these values directly into the acquired ambience `StimulusFrame` buffer.
**Syntax**: **Syntax**:
``` ```
@@ -41,10 +41,10 @@ Each stim-buff-api is designed to work with specific stim-iface libraries that u
``` ```
**Stim-Buff-API**: `livoxGen1-pcloud` **Stim-Buff-API**: `livoxGen1-pcloud`
**Quale-Iface-API**: `pcloudAmbience` - Computes average intensity per frame and exports postrin/negtrin based on configurable thresholds **Quale-Iface-API**: `pcloudAmbience` - Delivers per-dagram average intensity floats; postrin/negtrin binding and passband-style aggregation are being revised (see intrinsic parameters below).
**Intrinsic Stimuli Support** (for pcloudAmbience quale-iface-api): **Intrinsic Stimuli Support** (for pcloudAmbience quale-iface-api):
The `pcloudAmbience` quale-iface-api exports both a postrin and a negtrin whose The `pcloudAmbience` quale-iface-api is intended to export both a postrin and a negtrin whose
thresholds are configurable via standard quale-iface-api-params: thresholds are configurable via standard quale-iface-api-params:
- **Postrin interest threshold**: Configurable via `postrin-interest-[percentage|pc|threshold|thresh|thr]` - **Postrin interest threshold**: Configurable via `postrin-interest-[percentage|pc|threshold|thresh|thr]`
- **Negtrin interest threshold**: Configurable via `negtrin-interest-[percentage|pc|threshold|thresh|thr]` - **Negtrin interest threshold**: Configurable via `negtrin-interest-[percentage|pc|threshold|thresh|thr]`
@@ -165,7 +165,7 @@ The `livoxProto1` provider accepts the following parameters:
| Stim Feature | Stim-Buff-API | Quale-Iface-API | Description | | Stim Feature | Stim-Buff-API | Quale-Iface-API | Description |
|--------------|---------------|----------------|-------------| |--------------|---------------|----------------|-------------|
| Point Cloud Intensity | `livoxGen1-pcloudIntensity` | `pcloudIntensity` | Light intensity/reflectivity data | | Point Cloud Intensity | `livoxGen1-pcloudIntensity` | `pcloudIntensity` | Light intensity/reflectivity data |
| Point Cloud Ambience | `livoxGen1-pcloud` | `pcloudAmbience` | High-intensity point count per slot | | Point Cloud Ambience | `livoxGen1-pcloud` | `pcloudAmbience` | Per-dagram average intensity vector (`float` × `n-dgrams-per-frame`) |
| Point Cloud Coordinates | `livoxGen1-pcloud` | `pcloud` | Spatial coordinate data | | Point Cloud Coordinates | `livoxGen1-pcloud` | `pcloud` | Spatial coordinate data |
| Gyroscope | `livoxGen1-gyro` | `gyro` | Angular velocity measurements | | Gyroscope | `livoxGen1-gyro` | `gyro` | Angular velocity measurements |
| Accelerometer | `livoxGen1-accel` | `accel` | Linear acceleration measurements | | Accelerometer | `livoxGen1-accel` | `accel` | Linear acceleration measurements |
-28
View File
@@ -1,28 +0,0 @@
#ifndef _PCLOUD_AMBIENCE_STENCIL_H
#define _PCLOUD_AMBIENCE_STENCIL_H
#include <cstdint>
#include <cstddef>
#include <user/stencil.h>
namespace smo {
namespace stim_buff {
/**
* PcloudAmbienceStencil represents stencils for point cloud ambience data.
* This is a base class for device-specific implementations.
*/
class PcloudAmbienceStencil
: public smo::cologex::Stencil
{
public:
typedef uint32_t PcloudAmbienceStimulusValue;
PcloudAmbienceStencil() = default;
virtual ~PcloudAmbienceStencil() = default;
};
} // namespace stim_buff
} // namespace smo
#endif // _PCLOUD_AMBIENCE_STENCIL_H
+3 -3
View File
@@ -25,7 +25,7 @@ __kernel void collate(
__global uchar* assembly, __global uchar* assembly,
__global float* collation, __global float* collation,
__global float* intensityBuffer, __global float* intensityBuffer,
__global float* averageIntensityBuffer, __global float* ambienceBuffer,
uint slotStride, uint slotStride,
uint nPointsPerSlot, uint nPointsPerSlot,
uint nDgramsPerFrame) uint nDgramsPerFrame)
@@ -368,9 +368,9 @@ __kernel void collate(
// Unsupported data types are silently ignored // Unsupported data types are silently ignored
// Write average intensity for this work item (once at the end) // Write average intensity for this work item (once at the end)
if (averageIntensityBuffer != NULL) if (ambienceBuffer != NULL)
{ {
averageIntensityBuffer[slotIndex] = (validPointCount > 0) ? ambienceBuffer[slotIndex] = (validPointCount > 0) ?
(intensitySum / (float)validPointCount) : 0.0f; (intensitySum / (float)validPointCount) : 0.0f;
} }
} }
@@ -1,63 +0,0 @@
#ifndef _LG1_PCLOUD_AMBIENCE_STENCIL_H
#define _LG1_PCLOUD_AMBIENCE_STENCIL_H
#include "livoxGen1.h"
#include <user/pcloudAmbienceStencil.h>
#include <user/stencil.h>
namespace smo {
namespace stim_buff {
/**
* LG1PcloudAmbienceStencil represents Livox Gen1-specific stencils for
* ambience data. It holds a single RangeDescriptor with stimulusBufferSpot=0
* and nContiguousSpots=1.
*/
class LG1PcloudAmbienceStencil
: public PcloudAmbienceStencil
{
public:
explicit LG1PcloudAmbienceStencil()
: PcloudAmbienceStencil(),
rangeDescriptor{0, 1}
{}
~LG1PcloudAmbienceStencil() = default;
// Implement pure virtual functions from Stencil
bool hasData() const override
{
return true;
}
size_t getRelevantCount() const override
{
return rangeDescriptor.nContiguousSpots;
}
bool isRelevant(size_t offset) const override
{
return (offset >= rangeDescriptor.stimulusBufferSpot &&
offset < (rangeDescriptor.stimulusBufferSpot
+ rangeDescriptor.nContiguousSpots));
}
size_t getNRangeDescriptors() const override
{
return 1;
}
bool buildStencilMetadata() override
{
// Metadata is already built (single fixed descriptor)
return true;
}
public:
smo::cologex::Stencil::RangeDescriptor rangeDescriptor;
};
} // namespace stim_buff
} // namespace smo
#endif // _LG1_PCLOUD_AMBIENCE_STENCIL_H
@@ -35,10 +35,8 @@ OpenClCollatingAndMeshingEngine::OpenClCollatingAndMeshingEngine(
computeDevice(nullptr), computeDevice(nullptr),
clAssemblyBufferClBuffer(nullptr), clAssemblyBufferClBuffer(nullptr),
clCollationBufferClBuffer(nullptr), clCollationBufferClBuffer(nullptr),
clAverageIntensityBufferClBuffer(nullptr),
clAssemblyBuffer(nullptr), clAssemblyBuffer(nullptr),
clCollationBuffer(nullptr), clCollationBuffer(nullptr),
clAverageIntensityBuffer(nullptr),
shouldAcceptRequests(false), shouldAcceptRequests(false),
compactIsRunning(false), compactIsRunning(false),
collateIsRunning(false), collateIsRunning(false),
@@ -47,11 +45,8 @@ assemblyBufferPtr(nullptr),
assemblyBufferSize(0), assemblyBufferSize(0),
collationBufferPtr(nullptr), collationBufferPtr(nullptr),
collationBufferSize(0), collationBufferSize(0),
averageIntensityBufferPtr(nullptr),
averageIntensityBufferSize(0),
mappedAssemblyBuffer(nullptr), mappedAssemblyBuffer(nullptr),
mappedCollationBuffer(nullptr), mappedCollationBuffer(nullptr),
mappedAverageIntensityBuffer(nullptr),
frameAssemblyDesc(nullptr) frameAssemblyDesc(nullptr)
{ {
} }
@@ -90,15 +85,11 @@ bool OpenClCollatingAndMeshingEngine::setup()
// Get StagingBuffer memory pointers from parent // Get StagingBuffer memory pointers from parent
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec(); struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
struct iovec collationIov = parent.collationBuffer.getClEngineIovec(); struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
struct iovec averageIntensityIov = parent.averageIntensityBuffer
.getClEngineIovec();
assemblyBufferPtr = assemblyIov.iov_base; assemblyBufferPtr = assemblyIov.iov_base;
assemblyBufferSize = assemblyIov.iov_len; assemblyBufferSize = assemblyIov.iov_len;
collationBufferPtr = collationIov.iov_base; collationBufferPtr = collationIov.iov_base;
collationBufferSize = collationIov.iov_len; collationBufferSize = collationIov.iov_len;
averageIntensityBufferPtr = averageIntensityIov.iov_base;
averageIntensityBufferSize = averageIntensityIov.iov_len;
// Get FrameAssemblyDesc from assembly buffer // Get FrameAssemblyDesc from assembly buffer
frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>( frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>(
@@ -140,28 +131,13 @@ bool OpenClCollatingAndMeshingEngine::setup()
return false; return false;
} }
auto wip_clAverageIntensityBufferClBuffer = smoHooksPtr
->ComputeManager_createUseHostPtrBuffer(
averageIntensityBufferPtr, averageIntensityBufferSize,
CL_MEM_WRITE_ONLY);
if (!wip_clAverageIntensityBufferClBuffer)
{
std::cerr << __func__ << ": failed to create average intensity buffer"
<< std::endl;
return false;
}
// Cache cl_mem handles for the device we're using // Cache cl_mem handles for the device we're using
cl_mem wip_clAssemblyBuffer = wip_clAssemblyBufferClBuffer cl_mem wip_clAssemblyBuffer = wip_clAssemblyBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice); ->getAssociatedBufferHandleForDevice(wip_computeDevice);
cl_mem wip_clCollationBuffer = wip_clCollationBufferClBuffer cl_mem wip_clCollationBuffer = wip_clCollationBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice); ->getAssociatedBufferHandleForDevice(wip_computeDevice);
cl_mem wip_clAverageIntensityBuffer = wip_clAverageIntensityBufferClBuffer
->getAssociatedBufferHandleForDevice(wip_computeDevice);
if (!wip_clAssemblyBuffer || !wip_clCollationBuffer if (!wip_clAssemblyBuffer || !wip_clCollationBuffer)
|| !wip_clAverageIntensityBuffer)
{ {
std::cerr << __func__ << ": failed to get buffer handles for device" std::cerr << __func__ << ": failed to get buffer handles for device"
<< std::endl; << std::endl;
@@ -186,10 +162,8 @@ bool OpenClCollatingAndMeshingEngine::setup()
computeDevice = wip_computeDevice; computeDevice = wip_computeDevice;
clAssemblyBufferClBuffer = wip_clAssemblyBufferClBuffer; clAssemblyBufferClBuffer = wip_clAssemblyBufferClBuffer;
clCollationBufferClBuffer = wip_clCollationBufferClBuffer; clCollationBufferClBuffer = wip_clCollationBufferClBuffer;
clAverageIntensityBufferClBuffer = wip_clAverageIntensityBufferClBuffer;
clAssemblyBuffer = wip_clAssemblyBuffer; clAssemblyBuffer = wip_clAssemblyBuffer;
clCollationBuffer = wip_clCollationBuffer; clCollationBuffer = wip_clCollationBuffer;
clAverageIntensityBuffer = wip_clAverageIntensityBuffer;
slotCompactorProgram = std::move(wip_slotCompactorProgram); slotCompactorProgram = std::move(wip_slotCompactorProgram);
collateProgram = std::move(wip_collateProgram); collateProgram = std::move(wip_collateProgram);
slotCompactorKernel = std::move(wip_slotCompactorKernel); slotCompactorKernel = std::move(wip_slotCompactorKernel);
@@ -253,12 +227,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Release OpenCL buffers via smo hooks // Release OpenCL buffers via smo hooks
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer) if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
{ {
if (clAverageIntensityBufferClBuffer)
{
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
clAverageIntensityBufferClBuffer);
clAverageIntensityBufferClBuffer.reset();
}
if (clCollationBufferClBuffer) if (clCollationBufferClBuffer)
{ {
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer( smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
@@ -275,7 +243,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
// Reset cached cl_mem handles // Reset cached cl_mem handles
clCollationBuffer = nullptr; clCollationBuffer = nullptr;
clAverageIntensityBuffer = nullptr;
clAssemblyBuffer = nullptr; clAssemblyBuffer = nullptr;
// Release kernels and programs (handled automatically by unique_ptr destructors) // Release kernels and programs (handled automatically by unique_ptr destructors)
@@ -301,8 +268,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
assemblyBufferSize = 0; assemblyBufferSize = 0;
collationBufferPtr = nullptr; collationBufferPtr = nullptr;
collationBufferSize = 0; collationBufferSize = 0;
averageIntensityBufferPtr = nullptr;
averageIntensityBufferSize = 0;
frameAssemblyDesc = nullptr; frameAssemblyDesc = nullptr;
} }
@@ -409,15 +374,11 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
auto validateBuffers = [this]() { auto validateBuffers = [this]() {
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec(); struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
struct iovec collationIov = parent.collationBuffer.getClEngineIovec(); struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
struct iovec averageIntensityIov = parent.averageIntensityBuffer
.getClEngineIovec();
if (assemblyIov.iov_base != assemblyBufferPtr if (assemblyIov.iov_base != assemblyBufferPtr
|| assemblyIov.iov_len != assemblyBufferSize || assemblyIov.iov_len != assemblyBufferSize
|| collationIov.iov_base != collationBufferPtr || collationIov.iov_base != collationBufferPtr
|| collationIov.iov_len != collationBufferSize || collationIov.iov_len != collationBufferSize)
|| averageIntensityIov.iov_base != averageIntensityBufferPtr
|| averageIntensityIov.iov_len != averageIntensityBufferSize)
{ {
throw std::runtime_error( throw std::runtime_error(
std::string(__func__) + ": buffer mismatch - buffers have changed"); std::string(__func__) + ": buffer mismatch - buffers have changed");
@@ -454,14 +415,6 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
} }
unmapCollationBuffer(); unmapCollationBuffer();
if (!mapAverageIntensityBuffer(CL_MAP_WRITE))
{
std::cerr << __func__ << ": failed to map average intensity buffer"
<< std::endl;
return false;
}
unmapAverageIntensityBuffer();
// Map/unmap intensity buffer if it exists // Map/unmap intensity buffer if it exists
if (intensityStimFrame.has_value()) if (intensityStimFrame.has_value())
@@ -486,6 +439,29 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
} }
} }
// Map/unmap ambience stim frame buffer (collate writes per-slot averages here)
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (!mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer))
{
std::cerr << __func__ << ": failed to map ambience buffer"
<< std::endl;
return false;
}
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
// Calculate global work size (just num slots in the frame) // Calculate global work size (just num slots in the frame)
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots); size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
@@ -708,21 +684,26 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
return false; return false;
} }
// Set average intensity buffer argument (arg 3) // Set ambience buffer argument (arg 3): acquired PcloudAmbience StimulusFrame
/** EXPLANATION: cl_mem ambienceClBufferArg = nullptr;
* We only pass the average intensity buffer argument to the collate kernel if (ambienceStimFrame.has_value())
* when ambienceStimFrame is present. This is because the collate kernel {
* only needs the average intensity buffer if ambience processing is StimulusFrame& ambienceFrame = ambienceStimFrame->get();
* requested (i.e., the ambience stimulus buffer is attached). If no const size_t needBytes = static_cast<size_t>(nDgramsPerFrame)
* ambienceStimFrame is supplied, we skip passing the buffer to avoid * sizeof(float);
* unnecessary work.
*/ if (ambienceFrame.slotDesc.nBytes < needBytes)
cl_mem averageIntensityClBuffer = nullptr; {
if (ambienceStimFrame.has_value()) { std::cerr << __func__ << ": ambience stim frame slot too small: "
averageIntensityClBuffer = clAverageIntensityBuffer; << ambienceFrame.slotDesc.nBytes << " < " << needBytes
<< std::endl;
return false;
}
ambienceClBufferArg = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
} }
err = clSetKernelArg( err = clSetKernelArg(
collateKernel.get(), 3, sizeof(cl_mem), &averageIntensityClBuffer); collateKernel.get(), 3, sizeof(cl_mem), &ambienceClBufferArg);
if (err != CL_SUCCESS) if (err != CL_SUCCESS)
{ {
@@ -804,8 +785,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame, std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
bool isFinalizing) bool isFinalizing)
{ {
(void)ambienceStimFrame;
cl_map_flags mapFlags; cl_map_flags mapFlags;
/** EXPLANATION: /** EXPLANATION:
* Technically we should only need to do this if we plan to read the * Technically we should only need to do this if we plan to read the
@@ -818,10 +797,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
unmapCollationBuffer(); unmapCollationBuffer();
} }
if (mapAverageIntensityBuffer(mapFlags)) {
unmapAverageIntensityBuffer();
}
// Map/unmap intensity buffer if it exists // Map/unmap intensity buffer if it exists
if (intensityStimFrame.has_value()) if (intensityStimFrame.has_value())
{ {
@@ -841,6 +816,25 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
} }
} }
// Sync GPU writes into ambience stim frame host backing store
if (ambienceStimFrame.has_value())
{
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
->getAssociatedBufferHandleForDevice(computeDevice);
if (ambienceClBuffer)
{
void* mappedAmbienceBuffer = nullptr;
if (mapBuffer(
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
CL_MAP_READ, mappedAmbienceBuffer))
{
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
}
}
}
clFlush(computeDevice->commandQueue); clFlush(computeDevice->commandQueue);
// Stop only collate kernel // Stop only collate kernel
@@ -962,60 +956,6 @@ bool OpenClCollatingAndMeshingEngine::unmapCollationBuffer()
return true; return true;
} }
bool OpenClCollatingAndMeshingEngine::mapAverageIntensityBuffer(
cl_map_flags mapFlags
)
{
return mapBuffer(
clAverageIntensityBuffer, averageIntensityBufferSize, mapFlags,
mappedAverageIntensityBuffer);
}
bool OpenClCollatingAndMeshingEngine::unmapAverageIntensityBuffer()
{
unmapBuffer(clAverageIntensityBuffer, mappedAverageIntensityBuffer);
mappedAverageIntensityBuffer = nullptr;
return true;
}
void OpenClCollatingAndMeshingEngine::produceAmbienceStimulusFrame(
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
uint32_t nSucceeded)
{
if (!ambienceStimFrame.has_value()) { return; }
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
if (!ambienceBuff) { return; }
const auto& ambienceCountComparator = ambienceBuff->ambienceCountComparator;
// Read average intensity values from averageIntensityBuffer
float* averageIntensityAverages = reinterpret_cast<float*>(
averageIntensityBufferPtr);
uint32_t ambiencePassbandCount = 0;
if (ambienceCountComparator.has_value())
{
// Count frames whose average intensity matches the configured comparator.
for (uint32_t i = 0; i < nSucceeded; ++i)
{
float avg = averageIntensityAverages[i];
if (ambienceCountComparator.value()(avg)) {
++ambiencePassbandCount;
}
}
}
// Write the ambience count to the ambienceStimFrame
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
using PcloudAmbienceStimVal = PcloudAmbienceStencil
::PcloudAmbienceStimulusValue;
PcloudAmbienceStimVal* ambienceValue = reinterpret_cast<
PcloudAmbienceStimVal*>(ambienceFrame.slotDesc.vaddr);
ambienceValue[0] = ambiencePassbandCount;
}
class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq
: public sscl::PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn> : public sscl::PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>
{ {
@@ -1191,13 +1131,6 @@ public:
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load(); uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
// Produce ambience frame if ambience buffer is attached
if (context->ambienceStimFrame.has_value())
{
engine.produceAmbienceStimulusFrame(
context->ambienceStimFrame, nSucceeded);
}
int returnMode = static_cast<int>(engine.parent.device->currentReturnMode); int returnMode = static_cast<int>(engine.parent.device->currentReturnMode);
size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram( size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
returnMode); returnMode);
@@ -1218,46 +1151,10 @@ public:
} }
} }
} }
(void)highIntensityCount;
#if 0 #if 0
// Print all averages above thresholds from average intensity buffer // Legacy debug: ambience floats live in ambienceStimFrame after collate
if (context->ambienceStimFrame.has_value())
{
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
engine.parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
const auto& ambienceCountComparator =
ambienceBuff->ambienceCountComparator;
uint32_t postrinThreshold = ambienceBuff->postrinInterestThreshold;
float* averageIntensityAverages = reinterpret_cast<float*>(
engine.averageIntensityBufferPtr);
// Count frames that met the postrin threshold
uint32_t framesMetThreshold = 0;
for (uint32_t i = 0; i < nSucceeded; ++i)
{
float avg = averageIntensityAverages[i];
if (ambienceCountComparator(avg)) {
++framesMetThreshold;
}
}
// Read the stimFrame value (ambience count)
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
using PcloudAmbienceStimVal = PcloudAmbienceStencil
::PcloudAmbienceStimulusValue;
PcloudAmbienceStimVal* ambienceValue = reinterpret_cast<
PcloudAmbienceStimVal*>(ambienceFrame.slotDesc.vaddr);
PcloudAmbienceStimVal stimFrameValue = ambienceValue[0];
bool meetsPostrinThreshold = (framesMetThreshold >= postrinThreshold);
std::cout << __func__ << ": frames met threshold=" << framesMetThreshold
<< ", stimFrame value=" << stimFrameValue
<< ", postrin threshold=" << postrinThreshold
<< ", meets postrin=" << (meetsPostrinThreshold ? "yes" : "no")
<< std::endl;
}
std::cout << __func__ << ": intensityRingBufferIndex=" std::cout << __func__ << ": intensityRingBufferIndex="
<< (context->intensityStimFrame.has_value() ? << (context->intensityStimFrame.has_value() ?
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX) context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
@@ -108,11 +108,6 @@ public:
std::chrono::milliseconds getCompactKernelDuration() const; std::chrono::milliseconds getCompactKernelDuration() const;
std::chrono::milliseconds getCollateKernelDuration() const; std::chrono::milliseconds getCollateKernelDuration() const;
// Produce ambience frame from average intensity data
void produceAmbienceStimulusFrame(
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
uint32_t nSucceeded);
private: private:
PcloudStimulusProducer& parent; PcloudStimulusProducer& parent;
@@ -126,11 +121,9 @@ private:
// OpenCL buffers (managed by ComputeManager) // OpenCL buffers (managed by ComputeManager)
std::shared_ptr<smo::compute::ClBuffer> clAssemblyBufferClBuffer; std::shared_ptr<smo::compute::ClBuffer> clAssemblyBufferClBuffer;
std::shared_ptr<smo::compute::ClBuffer> clCollationBufferClBuffer; std::shared_ptr<smo::compute::ClBuffer> clCollationBufferClBuffer;
std::shared_ptr<smo::compute::ClBuffer> clAverageIntensityBufferClBuffer;
// Cached cl_mem handles for the device we're using // Cached cl_mem handles for the device we're using
cl_mem clAssemblyBuffer; cl_mem clAssemblyBuffer;
cl_mem clCollationBuffer; cl_mem clCollationBuffer;
cl_mem clAverageIntensityBuffer;
// State tracking // State tracking
sscl::SpinLock shouldAcceptRequestsLock; sscl::SpinLock shouldAcceptRequestsLock;
@@ -145,12 +138,9 @@ private:
size_t assemblyBufferSize; size_t assemblyBufferSize;
void* collationBufferPtr; void* collationBufferPtr;
size_t collationBufferSize; size_t collationBufferSize;
void* averageIntensityBufferPtr;
size_t averageIntensityBufferSize;
// Mapped buffer pointers (for zero-copy synchronization) // Mapped buffer pointers (for zero-copy synchronization)
void* mappedAssemblyBuffer; void* mappedAssemblyBuffer;
void* mappedCollationBuffer; void* mappedCollationBuffer;
void* mappedAverageIntensityBuffer;
// Frame descriptor (cached from setup) // Frame descriptor (cached from setup)
std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc; std::shared_ptr<FrameAssemblyDesc> frameAssemblyDesc;
@@ -196,8 +186,6 @@ private:
bool unmapAssemblyBuffer(); bool unmapAssemblyBuffer();
bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ); bool mapCollationBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapCollationBuffer(); bool unmapCollationBuffer();
bool mapAverageIntensityBuffer(cl_map_flags mapFlags = CL_MAP_READ);
bool unmapAverageIntensityBuffer();
// Forward declaration for continuation class // Forward declaration for continuation class
class CompactCollateAndMeshFrameReq; class CompactCollateAndMeshFrameReq;
@@ -3,7 +3,6 @@
#include <memory> #include <memory>
#include <cstdint> #include <cstdint>
#include <list>
#include <cstddef> #include <cstddef>
#include <optional> #include <optional>
#include <vector> #include <vector>
@@ -13,7 +12,6 @@
#include <user/deviceAttachmentSpec.h> #include <user/deviceAttachmentSpec.h>
#include <user/intrinThresholdParams.h> #include <user/intrinThresholdParams.h>
#include "pcloudAmbienceQualeIfaceApi.h" #include "pcloudAmbienceQualeIfaceApi.h"
#include "lg1PcloudAmbienceStencil.h"
namespace smo { namespace smo {
namespace stim_buff { namespace stim_buff {
@@ -175,12 +173,12 @@ public:
const StagingBuffer::IOEngineConstraints& outputEngineConstraints, const StagingBuffer::IOEngineConstraints& outputEngineConstraints,
const SmoCallbacks& callbacks, const SmoCallbacks& callbacks,
cl_mem_flags flags, cl_mem_flags flags,
size_t nStencils_, size_t nDgramsPerFrame_) size_t nDgramsPerFrame_)
: StimulusBuffer( : StimulusBuffer(
parent, deviceAttachmentSpec, histbuffMs, parent, deviceAttachmentSpec, histbuffMs,
inputEngineConstraints, outputEngineConstraints, inputEngineConstraints, outputEngineConstraints,
callbacks, flags), callbacks, flags),
nStencils(nStencils_) nDgramsPerFrame(nDgramsPerFrame_)
{ {
intrin::validateNoForbiddenUnitlessIntrinParams( intrin::validateNoForbiddenUnitlessIntrinParams(
deviceAttachmentSpec->qualeIfaceApiParams); deviceAttachmentSpec->qualeIfaceApiParams);
@@ -195,11 +193,6 @@ public:
deviceAttachmentSpec); deviceAttachmentSpec);
validateAmbienceIntrinComparatorConfig( validateAmbienceIntrinComparatorConfig(
intrinStatus, ambienceCountComparator); intrinStatus, ambienceCountComparator);
// Construct stencils and add to list (FIFO behavior)
for (size_t i = 0; i < nStencils; ++i) {
stencils.emplace_back();
}
} }
~PcloudAmbienceStimulusBuffer() = default; ~PcloudAmbienceStimulusBuffer() = default;
@@ -223,8 +216,7 @@ public:
uint32_t intrinInterestPercentage; uint32_t intrinInterestPercentage;
uint32_t intrinInterestThreshold; uint32_t intrinInterestThreshold;
std::optional<ParamComparator> ambienceCountComparator; std::optional<ParamComparator> ambienceCountComparator;
size_t nStencils; size_t nDgramsPerFrame;
std::list<LG1PcloudAmbienceStencil> stencils;
}; };
} // namespace stim_buff } // namespace stim_buff
@@ -9,7 +9,6 @@
#include <spinscale/asynchronousLoop.h> #include <spinscale/asynchronousLoop.h>
#include <user/stimulusFrame.h> #include <user/stimulusFrame.h>
#include <user/frameAssemblyDesc.h> #include <user/frameAssemblyDesc.h>
#include <user/pcloudAmbienceStencil.h>
#include <livoxProto1/device.h> #include <livoxProto1/device.h>
#include "livoxGen1.h" #include "livoxGen1.h"
#include "pcloudStimulusProducer.h" #include "pcloudStimulusProducer.h"
@@ -54,20 +53,14 @@ static StagingBuffer::IOEngineConstraints openClIntensityInputConstraints(
// framePadToNBytes (pointer size) // framePadToNBytes (pointer size)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE))); static_cast<size_t>(sysconf(_SC_PAGE_SIZE)));
/* IOEngineConstraints for PcloudAmbienceStimulusBuffer's StagingBuffer, which
* backs SpMcRingBuffer (one StimulusFrame per ring slot). Not the OpenCL
* collating engine's assembly/collation buffers those use assemblyBuffer /
* collationBuffer above. slotPadToNBytes here is the byte size of each ringbuff
* slot: nDgramsPerStagingBufferFrame floats (set in ctor).
*/
static StagingBuffer::IOEngineConstraints openClAmbienceInputConstraints( static StagingBuffer::IOEngineConstraints openClAmbienceInputConstraints(
// slotStartAlignmentByteVal (sizeof(void*))
sizeof(PcloudAmbienceStencil::PcloudAmbienceStimulusValue),
// slotPadToNBytes (sizeof(PcloudAmbienceStimulusValue))
sizeof(PcloudAmbienceStencil::PcloudAmbienceStimulusValue),
// frameStartAlignmentByteVal (page alignment)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
// framePadToNBytes (page alignment)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)));
static StagingBuffer::IOEngineConstraints openClAverageIntensityConstraints(
// slotStartAlignmentByteVal (sizeof(float))
sizeof(float), sizeof(float),
// slotPadToNBytes (sizeof(float))
sizeof(float), sizeof(float),
// frameStartAlignmentByteVal (page alignment) // frameStartAlignmentByteVal (page alignment)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)), static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
@@ -96,12 +89,6 @@ collationBuffer(
StagingBuffer::IOEngineConstraints::openClInputConstraints, StagingBuffer::IOEngineConstraints::openClInputConstraints,
nDgramsPerStagingBufferFrame), nDgramsPerStagingBufferFrame),
collationBufferMlockPinner(collationBuffer.makeMlockPinner()), collationBufferMlockPinner(collationBuffer.makeMlockPinner()),
averageIntensityBuffer(
openClAverageIntensityConstraints,
openClAverageIntensityConstraints,
nDgramsPerStagingBufferFrame),
averageIntensityBufferMlockPinner(
averageIntensityBuffer.makeMlockPinner()),
pcloudFrameDumper(deviceAttachmentSpec), pcloudFrameDumper(deviceAttachmentSpec),
tempStimulusFrameMem(0), tempStimulusFrameMem(0),
tempStimulusFrame( tempStimulusFrame(
@@ -111,6 +98,10 @@ tempStimulusFrame(
sizeof(tempStimulusFrameMem)}, sizeof(tempStimulusFrameMem)},
*smoHooksPtr, 0, SIZE_MAX) *smoHooksPtr, 0, SIZE_MAX)
{ {
// See comment in openClAmbienceInputConstraints above.
openClAmbienceInputConstraints.slotPadToNBytes =
nDgramsPerStagingBufferFrame * sizeof(float);
if (smoHooksPtr->OptionParser_getOptions().verbose) if (smoHooksPtr->OptionParser_getOptions().verbose)
{ {
std::cout << __func__ << ": assembly buffer : " std::cout << __func__ << ": assembly buffer : "
@@ -291,12 +282,10 @@ PcloudStimulusProducer::getOrCreateAttachedStimulusBuffer(
// Parse qualeIfaceApi to determine buffer type // Parse qualeIfaceApi to determine buffer type
const std::string& qualeIfaceApi = deviceAttachmentSpec->qualeIfaceApi; const std::string& qualeIfaceApi = deviceAttachmentSpec->qualeIfaceApi;
// Calculate nPointsPerDgram based on return mode
size_t nPointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
static_cast<int>(device->currentReturnMode));
if (qualeIfaceApi == "mesh") if (qualeIfaceApi == "mesh")
{ {
size_t nPointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
static_cast<int>(device->currentReturnMode));
/* Calculate slotStrideNBytes: /* Calculate slotStrideNBytes:
* nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 3 * nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 3
*/ */
@@ -318,6 +307,8 @@ PcloudStimulusProducer::getOrCreateAttachedStimulusBuffer(
} }
else if (qualeIfaceApi == "pcloudIntensity") else if (qualeIfaceApi == "pcloudIntensity")
{ {
size_t nPointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
static_cast<int>(device->currentReturnMode));
/* Calculate slotStrideNBytes: /* Calculate slotStrideNBytes:
* nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 1 * nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 1
*/ */
@@ -340,27 +331,17 @@ PcloudStimulusProducer::getOrCreateAttachedStimulusBuffer(
} }
else if (qualeIfaceApi == "pcloudAmbience") else if (qualeIfaceApi == "pcloudAmbience")
{ {
// Parse n-stencils from qualeIfaceApiParams auto ambienceStimBuff = std::make_shared<PcloudAmbienceStimulusBuffer>(
const std::vector<std::string> nStencilsParamNames = {
"n-stencils"
};
int nStencilsInt = device::DeviceAttachmentSpec
::parseOptionalParamAsIntWithSynonyms(
deviceAttachmentSpec->qualeIfaceApiParams,
nStencilsParamNames, 1);
size_t nStencils = static_cast<size_t>(nStencilsInt);
auto ambienceBuffer = std::make_shared<PcloudAmbienceStimulusBuffer>(
*this, deviceAttachmentSpec, histbuffMs, *this, deviceAttachmentSpec, histbuffMs,
openClAmbienceInputConstraints, openClAmbienceInputConstraints, openClAmbienceInputConstraints, openClAmbienceInputConstraints,
*smoHooksPtr, CL_MEM_READ_WRITE, *smoHooksPtr, CL_MEM_READ_WRITE,
nStencils, this->nDgramsPerStagingBufferFrame); this->nDgramsPerStagingBufferFrame);
this->stop(); this->stop();
addAttachedStimulusBufferIfNotExists(ambienceBuffer); addAttachedStimulusBufferIfNotExists(ambienceStimBuff);
ambienceStimulusBuffer.store(ambienceBuffer, std::memory_order_release); ambienceStimulusBuffer.store(ambienceStimBuff, std::memory_order_release);
this->start(); this->start();
return ambienceBuffer; return ambienceStimBuff;
} }
else else
{ {
@@ -96,9 +96,6 @@ public:
IoUringAssemblyEngine ioUringAssemblyEngine; IoUringAssemblyEngine ioUringAssemblyEngine;
StagingBuffer collationBuffer; StagingBuffer collationBuffer;
std::unique_ptr<StagingBuffer::MlockPinner> collationBufferMlockPinner; std::unique_ptr<StagingBuffer::MlockPinner> collationBufferMlockPinner;
StagingBuffer averageIntensityBuffer;
std::unique_ptr<StagingBuffer::MlockPinner>
averageIntensityBufferMlockPinner;
LivoxPcloudFrameDumper pcloudFrameDumper; LivoxPcloudFrameDumper pcloudFrameDumper;
size_t tempStimulusFrameMem; size_t tempStimulusFrameMem;
StimulusFrame tempStimulusFrame; StimulusFrame tempStimulusFrame;