OClCollMeshEngn: produce ambience into stimBuff frames directly
This commit is contained in:
@@ -35,10 +35,8 @@ OpenClCollatingAndMeshingEngine::OpenClCollatingAndMeshingEngine(
|
||||
computeDevice(nullptr),
|
||||
clAssemblyBufferClBuffer(nullptr),
|
||||
clCollationBufferClBuffer(nullptr),
|
||||
clAverageIntensityBufferClBuffer(nullptr),
|
||||
clAssemblyBuffer(nullptr),
|
||||
clCollationBuffer(nullptr),
|
||||
clAverageIntensityBuffer(nullptr),
|
||||
shouldAcceptRequests(false),
|
||||
compactIsRunning(false),
|
||||
collateIsRunning(false),
|
||||
@@ -47,11 +45,8 @@ assemblyBufferPtr(nullptr),
|
||||
assemblyBufferSize(0),
|
||||
collationBufferPtr(nullptr),
|
||||
collationBufferSize(0),
|
||||
averageIntensityBufferPtr(nullptr),
|
||||
averageIntensityBufferSize(0),
|
||||
mappedAssemblyBuffer(nullptr),
|
||||
mappedCollationBuffer(nullptr),
|
||||
mappedAverageIntensityBuffer(nullptr),
|
||||
frameAssemblyDesc(nullptr)
|
||||
{
|
||||
}
|
||||
@@ -90,15 +85,11 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
||||
// Get StagingBuffer memory pointers from parent
|
||||
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
|
||||
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
|
||||
struct iovec averageIntensityIov = parent.averageIntensityBuffer
|
||||
.getClEngineIovec();
|
||||
|
||||
assemblyBufferPtr = assemblyIov.iov_base;
|
||||
assemblyBufferSize = assemblyIov.iov_len;
|
||||
collationBufferPtr = collationIov.iov_base;
|
||||
collationBufferSize = collationIov.iov_len;
|
||||
averageIntensityBufferPtr = averageIntensityIov.iov_base;
|
||||
averageIntensityBufferSize = averageIntensityIov.iov_len;
|
||||
|
||||
// Get FrameAssemblyDesc from assembly buffer
|
||||
frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>(
|
||||
@@ -140,28 +131,13 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
||||
return false;
|
||||
}
|
||||
|
||||
auto wip_clAverageIntensityBufferClBuffer = smoHooksPtr
|
||||
->ComputeManager_createUseHostPtrBuffer(
|
||||
averageIntensityBufferPtr, averageIntensityBufferSize,
|
||||
CL_MEM_WRITE_ONLY);
|
||||
|
||||
if (!wip_clAverageIntensityBufferClBuffer)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to create average intensity buffer"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Cache cl_mem handles for the device we're using
|
||||
cl_mem wip_clAssemblyBuffer = wip_clAssemblyBufferClBuffer
|
||||
->getAssociatedBufferHandleForDevice(wip_computeDevice);
|
||||
cl_mem wip_clCollationBuffer = wip_clCollationBufferClBuffer
|
||||
->getAssociatedBufferHandleForDevice(wip_computeDevice);
|
||||
cl_mem wip_clAverageIntensityBuffer = wip_clAverageIntensityBufferClBuffer
|
||||
->getAssociatedBufferHandleForDevice(wip_computeDevice);
|
||||
|
||||
if (!wip_clAssemblyBuffer || !wip_clCollationBuffer
|
||||
|| !wip_clAverageIntensityBuffer)
|
||||
if (!wip_clAssemblyBuffer || !wip_clCollationBuffer)
|
||||
{
|
||||
std::cerr << __func__ << ": failed to get buffer handles for device"
|
||||
<< std::endl;
|
||||
@@ -186,10 +162,8 @@ bool OpenClCollatingAndMeshingEngine::setup()
|
||||
computeDevice = wip_computeDevice;
|
||||
clAssemblyBufferClBuffer = wip_clAssemblyBufferClBuffer;
|
||||
clCollationBufferClBuffer = wip_clCollationBufferClBuffer;
|
||||
clAverageIntensityBufferClBuffer = wip_clAverageIntensityBufferClBuffer;
|
||||
clAssemblyBuffer = wip_clAssemblyBuffer;
|
||||
clCollationBuffer = wip_clCollationBuffer;
|
||||
clAverageIntensityBuffer = wip_clAverageIntensityBuffer;
|
||||
slotCompactorProgram = std::move(wip_slotCompactorProgram);
|
||||
collateProgram = std::move(wip_collateProgram);
|
||||
slotCompactorKernel = std::move(wip_slotCompactorKernel);
|
||||
@@ -253,12 +227,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
||||
// Release OpenCL buffers via smo hooks
|
||||
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
|
||||
{
|
||||
if (clAverageIntensityBufferClBuffer)
|
||||
{
|
||||
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
|
||||
clAverageIntensityBufferClBuffer);
|
||||
clAverageIntensityBufferClBuffer.reset();
|
||||
}
|
||||
if (clCollationBufferClBuffer)
|
||||
{
|
||||
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
|
||||
@@ -275,7 +243,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
||||
|
||||
// Reset cached cl_mem handles
|
||||
clCollationBuffer = nullptr;
|
||||
clAverageIntensityBuffer = nullptr;
|
||||
clAssemblyBuffer = nullptr;
|
||||
|
||||
// Release kernels and programs (handled automatically by unique_ptr destructors)
|
||||
@@ -301,8 +268,6 @@ void OpenClCollatingAndMeshingEngine::finalize()
|
||||
assemblyBufferSize = 0;
|
||||
collationBufferPtr = nullptr;
|
||||
collationBufferSize = 0;
|
||||
averageIntensityBufferPtr = nullptr;
|
||||
averageIntensityBufferSize = 0;
|
||||
frameAssemblyDesc = nullptr;
|
||||
}
|
||||
|
||||
@@ -409,15 +374,11 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
auto validateBuffers = [this]() {
|
||||
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
|
||||
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
|
||||
struct iovec averageIntensityIov = parent.averageIntensityBuffer
|
||||
.getClEngineIovec();
|
||||
|
||||
if (assemblyIov.iov_base != assemblyBufferPtr
|
||||
|| assemblyIov.iov_len != assemblyBufferSize
|
||||
|| collationIov.iov_base != collationBufferPtr
|
||||
|| collationIov.iov_len != collationBufferSize
|
||||
|| averageIntensityIov.iov_base != averageIntensityBufferPtr
|
||||
|| averageIntensityIov.iov_len != averageIntensityBufferSize)
|
||||
|| collationIov.iov_len != collationBufferSize)
|
||||
{
|
||||
throw std::runtime_error(
|
||||
std::string(__func__) + ": buffer mismatch - buffers have changed");
|
||||
@@ -454,14 +415,6 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
}
|
||||
|
||||
unmapCollationBuffer();
|
||||
if (!mapAverageIntensityBuffer(CL_MAP_WRITE))
|
||||
{
|
||||
std::cerr << __func__ << ": failed to map average intensity buffer"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
unmapAverageIntensityBuffer();
|
||||
|
||||
// Map/unmap intensity buffer if it exists
|
||||
if (intensityStimFrame.has_value())
|
||||
@@ -486,6 +439,29 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
||||
}
|
||||
}
|
||||
|
||||
// Map/unmap ambience stim frame buffer (collate writes per-slot averages here)
|
||||
if (ambienceStimFrame.has_value())
|
||||
{
|
||||
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
|
||||
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
|
||||
if (ambienceClBuffer)
|
||||
{
|
||||
void* mappedAmbienceBuffer = nullptr;
|
||||
if (!mapBuffer(
|
||||
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
|
||||
CL_MAP_WRITE_INVALIDATE_REGION, mappedAmbienceBuffer))
|
||||
{
|
||||
std::cerr << __func__ << ": failed to map ambience buffer"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate global work size (just num slots in the frame)
|
||||
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
||||
|
||||
@@ -708,21 +684,26 @@ bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set average intensity buffer argument (arg 3)
|
||||
/** EXPLANATION:
|
||||
* We only pass the average intensity buffer argument to the collate kernel
|
||||
* when ambienceStimFrame is present. This is because the collate kernel
|
||||
* only needs the average intensity buffer if ambience processing is
|
||||
* requested (i.e., the ambience stimulus buffer is attached). If no
|
||||
* ambienceStimFrame is supplied, we skip passing the buffer to avoid
|
||||
* unnecessary work.
|
||||
*/
|
||||
cl_mem averageIntensityClBuffer = nullptr;
|
||||
if (ambienceStimFrame.has_value()) {
|
||||
averageIntensityClBuffer = clAverageIntensityBuffer;
|
||||
// Set ambience buffer argument (arg 3): acquired PcloudAmbience StimulusFrame
|
||||
cl_mem ambienceClBufferArg = nullptr;
|
||||
if (ambienceStimFrame.has_value())
|
||||
{
|
||||
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
|
||||
const size_t needBytes = static_cast<size_t>(nDgramsPerFrame)
|
||||
* sizeof(float);
|
||||
|
||||
if (ambienceFrame.slotDesc.nBytes < needBytes)
|
||||
{
|
||||
std::cerr << __func__ << ": ambience stim frame slot too small: "
|
||||
<< ambienceFrame.slotDesc.nBytes << " < " << needBytes
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
ambienceClBufferArg = ambienceFrame.clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
}
|
||||
err = clSetKernelArg(
|
||||
collateKernel.get(), 3, sizeof(cl_mem), &averageIntensityClBuffer);
|
||||
collateKernel.get(), 3, sizeof(cl_mem), &ambienceClBufferArg);
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
@@ -804,8 +785,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
||||
bool isFinalizing)
|
||||
{
|
||||
(void)ambienceStimFrame;
|
||||
|
||||
cl_map_flags mapFlags;
|
||||
/** EXPLANATION:
|
||||
* Technically we should only need to do this if we plan to read the
|
||||
@@ -818,10 +797,6 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
||||
unmapCollationBuffer();
|
||||
}
|
||||
|
||||
if (mapAverageIntensityBuffer(mapFlags)) {
|
||||
unmapAverageIntensityBuffer();
|
||||
}
|
||||
|
||||
// Map/unmap intensity buffer if it exists
|
||||
if (intensityStimFrame.has_value())
|
||||
{
|
||||
@@ -841,6 +816,25 @@ void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
||||
}
|
||||
}
|
||||
|
||||
// Sync GPU writes into ambience stim frame host backing store
|
||||
if (ambienceStimFrame.has_value())
|
||||
{
|
||||
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
|
||||
cl_mem ambienceClBuffer = ambienceFrame.clBuffer
|
||||
->getAssociatedBufferHandleForDevice(computeDevice);
|
||||
|
||||
if (ambienceClBuffer)
|
||||
{
|
||||
void* mappedAmbienceBuffer = nullptr;
|
||||
if (mapBuffer(
|
||||
ambienceClBuffer, ambienceFrame.slotDesc.nBytes,
|
||||
CL_MAP_READ, mappedAmbienceBuffer))
|
||||
{
|
||||
unmapBuffer(ambienceClBuffer, mappedAmbienceBuffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clFlush(computeDevice->commandQueue);
|
||||
|
||||
// Stop only collate kernel
|
||||
@@ -962,60 +956,6 @@ bool OpenClCollatingAndMeshingEngine::unmapCollationBuffer()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OpenClCollatingAndMeshingEngine::mapAverageIntensityBuffer(
|
||||
cl_map_flags mapFlags
|
||||
)
|
||||
{
|
||||
return mapBuffer(
|
||||
clAverageIntensityBuffer, averageIntensityBufferSize, mapFlags,
|
||||
mappedAverageIntensityBuffer);
|
||||
}
|
||||
|
||||
bool OpenClCollatingAndMeshingEngine::unmapAverageIntensityBuffer()
|
||||
{
|
||||
unmapBuffer(clAverageIntensityBuffer, mappedAverageIntensityBuffer);
|
||||
mappedAverageIntensityBuffer = nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpenClCollatingAndMeshingEngine::produceAmbienceStimulusFrame(
|
||||
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
||||
uint32_t nSucceeded)
|
||||
{
|
||||
if (!ambienceStimFrame.has_value()) { return; }
|
||||
|
||||
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
|
||||
parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
|
||||
if (!ambienceBuff) { return; }
|
||||
|
||||
const auto& ambienceCountComparator = ambienceBuff->ambienceCountComparator;
|
||||
|
||||
// Read average intensity values from averageIntensityBuffer
|
||||
float* averageIntensityAverages = reinterpret_cast<float*>(
|
||||
averageIntensityBufferPtr);
|
||||
|
||||
uint32_t ambiencePassbandCount = 0;
|
||||
if (ambienceCountComparator.has_value())
|
||||
{
|
||||
// Count frames whose average intensity matches the configured comparator.
|
||||
for (uint32_t i = 0; i < nSucceeded; ++i)
|
||||
{
|
||||
float avg = averageIntensityAverages[i];
|
||||
if (ambienceCountComparator.value()(avg)) {
|
||||
++ambiencePassbandCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write the ambience count to the ambienceStimFrame
|
||||
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
|
||||
using PcloudAmbienceStimVal = PcloudAmbienceStencil
|
||||
::PcloudAmbienceStimulusValue;
|
||||
PcloudAmbienceStimVal* ambienceValue = reinterpret_cast<
|
||||
PcloudAmbienceStimVal*>(ambienceFrame.slotDesc.vaddr);
|
||||
ambienceValue[0] = ambiencePassbandCount;
|
||||
}
|
||||
|
||||
class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq
|
||||
: public sscl::PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>
|
||||
{
|
||||
@@ -1191,13 +1131,6 @@ public:
|
||||
|
||||
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
|
||||
|
||||
// Produce ambience frame if ambience buffer is attached
|
||||
if (context->ambienceStimFrame.has_value())
|
||||
{
|
||||
engine.produceAmbienceStimulusFrame(
|
||||
context->ambienceStimFrame, nSucceeded);
|
||||
}
|
||||
|
||||
int returnMode = static_cast<int>(engine.parent.device->currentReturnMode);
|
||||
size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
|
||||
returnMode);
|
||||
@@ -1218,46 +1151,10 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
(void)highIntensityCount;
|
||||
|
||||
#if 0
|
||||
// Print all averages above thresholds from average intensity buffer
|
||||
if (context->ambienceStimFrame.has_value())
|
||||
{
|
||||
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
|
||||
engine.parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
|
||||
const auto& ambienceCountComparator =
|
||||
ambienceBuff->ambienceCountComparator;
|
||||
uint32_t postrinThreshold = ambienceBuff->postrinInterestThreshold;
|
||||
|
||||
float* averageIntensityAverages = reinterpret_cast<float*>(
|
||||
engine.averageIntensityBufferPtr);
|
||||
// Count frames that met the postrin threshold
|
||||
uint32_t framesMetThreshold = 0;
|
||||
for (uint32_t i = 0; i < nSucceeded; ++i)
|
||||
{
|
||||
float avg = averageIntensityAverages[i];
|
||||
if (ambienceCountComparator(avg)) {
|
||||
++framesMetThreshold;
|
||||
}
|
||||
}
|
||||
|
||||
// Read the stimFrame value (ambience count)
|
||||
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
|
||||
using PcloudAmbienceStimVal = PcloudAmbienceStencil
|
||||
::PcloudAmbienceStimulusValue;
|
||||
PcloudAmbienceStimVal* ambienceValue = reinterpret_cast<
|
||||
PcloudAmbienceStimVal*>(ambienceFrame.slotDesc.vaddr);
|
||||
PcloudAmbienceStimVal stimFrameValue = ambienceValue[0];
|
||||
|
||||
bool meetsPostrinThreshold = (framesMetThreshold >= postrinThreshold);
|
||||
|
||||
std::cout << __func__ << ": frames met threshold=" << framesMetThreshold
|
||||
<< ", stimFrame value=" << stimFrameValue
|
||||
<< ", postrin threshold=" << postrinThreshold
|
||||
<< ", meets postrin=" << (meetsPostrinThreshold ? "yes" : "no")
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
// Legacy debug: ambience floats live in ambienceStimFrame after collate
|
||||
std::cout << __func__ << ": intensityRingBufferIndex="
|
||||
<< (context->intensityStimFrame.has_value() ?
|
||||
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
|
||||
|
||||
Reference in New Issue
Block a user