280b6f7d1c
We modify the semantics/meaning of the ambience stim feature. It now represents the number of frames whose average intensity is below the ambienceLowVal. We can now implement the postrin as the event wherein the number of frames whose intensity <= ambienceLowVal exceeds postrin-interest-threshold.
1321 lines
37 KiB
C++
1321 lines
37 KiB
C++
#include <boostAsioLinkageFix.h>
|
|
#include <stdexcept>
|
|
#include <iostream>
|
|
#include <cstring>
|
|
#include <cstddef>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <algorithm>
|
|
#include <boost/system/error_code.hpp>
|
|
#include <boost/asio/deadline_timer.hpp>
|
|
#include <asynchronousContinuation.h>
|
|
#include <asynchronousBridge.h>
|
|
#include <callback.h>
|
|
#include <asynchronousLoop.h>
|
|
#include <componentThread.h>
|
|
#include <user/stimulusFrame.h>
|
|
#include <livoxProto1/device.h>
|
|
#include "livoxGen1.h"
|
|
#include "openClCollatingAndMeshingEngine.h"
|
|
#include "pcloudStimulusProducer.h"
|
|
#include "openClKernels.h"
|
|
#include <user/frameAssemblyDesc.h>
|
|
#include "ioUringAssemblyEngine.h"
|
|
#include <user/senseApiDesc.h>
|
|
|
|
extern const smo::stim_buff::SmoCallbacks* smoHooksPtr;
|
|
|
|
namespace smo {
|
|
namespace stim_buff {
|
|
|
|
OpenClCollatingAndMeshingEngine::OpenClCollatingAndMeshingEngine(
|
|
PcloudStimulusProducer& parent_)
|
|
: parent(parent_),
|
|
computeDevice(nullptr),
|
|
slotCompactorProgram(nullptr), collateProgram(nullptr),
|
|
slotCompactorKernel(nullptr), collateKernel(nullptr),
|
|
clAssemblyBufferClBuffer(nullptr),
|
|
clCollationBufferClBuffer(nullptr),
|
|
clAverageIntensityBufferClBuffer(nullptr),
|
|
clAssemblyBuffer(nullptr),
|
|
clCollationBuffer(nullptr),
|
|
clAverageIntensityBuffer(nullptr),
|
|
shouldAcceptRequests(false),
|
|
compactIsRunning(false),
|
|
collateIsRunning(false),
|
|
currentCompactKernelEvent(nullptr), currentCollateKernelEvent(nullptr),
|
|
assemblyBufferPtr(nullptr),
|
|
assemblyBufferSize(0),
|
|
collationBufferPtr(nullptr),
|
|
collationBufferSize(0),
|
|
averageIntensityBufferPtr(nullptr),
|
|
averageIntensityBufferSize(0),
|
|
mappedAssemblyBuffer(nullptr),
|
|
mappedCollationBuffer(nullptr),
|
|
mappedAverageIntensityBuffer(nullptr),
|
|
frameAssemblyDesc(nullptr)
|
|
{
|
|
}
|
|
|
|
OpenClCollatingAndMeshingEngine::~OpenClCollatingAndMeshingEngine()
|
|
{
|
|
finalize();
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::setup()
|
|
{
|
|
// Defensive check to prevent double-calling
|
|
{
|
|
SpinLock::Guard lock(shouldAcceptRequestsLock);
|
|
if (shouldAcceptRequests)
|
|
{
|
|
throw std::runtime_error(std::string(__func__) + ": setup() called "
|
|
"while already set up");
|
|
}
|
|
}
|
|
|
|
if (!smoHooksPtr || !smoHooksPtr->ComputeManager_getDevice)
|
|
{
|
|
std::cerr << __func__ << ": smo hooks not available" << std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Get ComputeDevice from smo hooks
|
|
computeDevice = smoHooksPtr->ComputeManager_getDevice();
|
|
if (!computeDevice)
|
|
{
|
|
std::cerr << __func__ << ": failed to get compute device" << std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Get StagingBuffer memory pointers from parent
|
|
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
|
|
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
|
|
struct iovec averageIntensityIov = parent.averageIntensityBuffer
|
|
.getClEngineIovec();
|
|
|
|
assemblyBufferPtr = assemblyIov.iov_base;
|
|
assemblyBufferSize = assemblyIov.iov_len;
|
|
collationBufferPtr = collationIov.iov_base;
|
|
collationBufferSize = collationIov.iov_len;
|
|
averageIntensityBufferPtr = averageIntensityIov.iov_base;
|
|
averageIntensityBufferSize = averageIntensityIov.iov_len;
|
|
|
|
// Get FrameAssemblyDesc from assembly buffer
|
|
frameAssemblyDesc = static_cast<std::shared_ptr<FrameAssemblyDesc>>(
|
|
parent.assemblyBuffer);
|
|
|
|
if (!frameAssemblyDesc || frameAssemblyDesc->slots.empty())
|
|
{
|
|
std::cerr << __func__ << ": invalid frame descriptor" << std::endl;
|
|
goto cleanup;
|
|
}
|
|
|
|
// Create OpenCL buffers using smo hooks
|
|
if (!smoHooksPtr->ComputeManager_createUseHostPtrBuffer)
|
|
{
|
|
std::cerr << __func__ << ": createUseHostPtrBuffer hook not available"
|
|
<< std::endl;
|
|
goto cleanup;
|
|
}
|
|
|
|
clAssemblyBufferClBuffer = smoHooksPtr
|
|
->ComputeManager_createUseHostPtrBuffer(
|
|
assemblyBufferPtr, assemblyBufferSize, CL_MEM_READ_WRITE);
|
|
|
|
if (!clAssemblyBufferClBuffer)
|
|
{
|
|
std::cerr << __func__ << ": failed to create assembly buffer"
|
|
<< std::endl;
|
|
goto cleanup;
|
|
}
|
|
|
|
clCollationBufferClBuffer = smoHooksPtr
|
|
->ComputeManager_createUseHostPtrBuffer(
|
|
collationBufferPtr, collationBufferSize, CL_MEM_WRITE_ONLY);
|
|
|
|
if (!clCollationBufferClBuffer)
|
|
{
|
|
std::cerr << __func__ << ": failed to create collation buffer"
|
|
<< std::endl;
|
|
goto cleanup;
|
|
}
|
|
|
|
clAverageIntensityBufferClBuffer = smoHooksPtr
|
|
->ComputeManager_createUseHostPtrBuffer(
|
|
averageIntensityBufferPtr, averageIntensityBufferSize,
|
|
CL_MEM_WRITE_ONLY);
|
|
|
|
if (!clAverageIntensityBufferClBuffer)
|
|
{
|
|
std::cerr << __func__ << ": failed to create average intensity buffer"
|
|
<< std::endl;
|
|
goto cleanup;
|
|
}
|
|
|
|
// Cache cl_mem handles for the device we're using
|
|
clAssemblyBuffer = clAssemblyBufferClBuffer
|
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
|
clCollationBuffer = clCollationBufferClBuffer
|
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
|
clAverageIntensityBuffer = clAverageIntensityBufferClBuffer
|
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
|
|
|
if (!clAssemblyBuffer || !clCollationBuffer || !clAverageIntensityBuffer)
|
|
{
|
|
std::cerr << __func__ << ": failed to get buffer handles for device"
|
|
<< std::endl;
|
|
goto cleanup;
|
|
}
|
|
|
|
// Compile and prepare both kernels
|
|
if (!compileAndPrepareKernels()) {
|
|
goto cleanup;
|
|
}
|
|
|
|
clFlush(computeDevice->commandQueue);
|
|
clFinish(computeDevice->commandQueue);
|
|
shouldAcceptRequests = true;
|
|
return true;
|
|
|
|
cleanup:
|
|
finalize();
|
|
return false;
|
|
}
|
|
|
|
void OpenClCollatingAndMeshingEngine::finalize()
|
|
{
|
|
// Call stop() to set shouldAcceptRequests to false and get previous state
|
|
bool wasAcceptingRequests = stop();
|
|
(void)wasAcceptingRequests;
|
|
|
|
// Complete any running kernels
|
|
if (compactIsRunning) { compactKernelComplete(true); }
|
|
if (collateIsRunning) {
|
|
collateKernelComplete(std::nullopt, std::nullopt, true);
|
|
}
|
|
|
|
{
|
|
/** EXPLANATION:
|
|
* Calculate the delay as the maximum of the configured delay and any
|
|
* future delays. The 0 is a placeholder for any delays that will be
|
|
* introduced in the future. When new delays are added, they should be
|
|
* included in the std::max() call (e.g., std::max(
|
|
* OCLCOLLMESH_ENGN_FINALIZE_DELAY_MS, futureDelay1, futureDelay2, 0)).
|
|
*/
|
|
int delayMs = std::max(OCLCOLLMESH_ENGN_FINALIZE_DELAY_MS, 0);
|
|
|
|
auto& ioService = smoHooksPtr->ComponentThread_getSelf()->getIoService();
|
|
AsynchronousBridge bridge(ioService);
|
|
boost::asio::deadline_timer timeoutTimer(ioService);
|
|
|
|
/** EXPLANATION:
|
|
* We wait for delayMs milliseconds to ensure that any in-flight OpenCL
|
|
* kernel operations have definitely finished. OpenCL kernels cannot be
|
|
* cancelled once enqueued, so in-flight kernels may still be executing
|
|
* when finalize() is called. The delay ensures any running kernels
|
|
* complete and their callbacks execute before we destroy resources.
|
|
* This prevents use-after-free errors from resumed async continuations
|
|
* accessing destroyed state.
|
|
*/
|
|
timeoutTimer.expires_from_now(
|
|
boost::posix_time::milliseconds(delayMs));
|
|
timeoutTimer.async_wait(
|
|
[&bridge](const boost::system::error_code& error)
|
|
{
|
|
(void)error;
|
|
|
|
// Always signal complete, whether timeout expired or was cancelled
|
|
bridge.setAsyncOperationComplete();
|
|
});
|
|
|
|
bridge.waitForAsyncOperationCompleteOrIoServiceStopped();
|
|
}
|
|
|
|
// Release OpenCL buffers via smo hooks
|
|
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer)
|
|
{
|
|
if (clAverageIntensityBufferClBuffer)
|
|
{
|
|
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
|
|
clAverageIntensityBufferClBuffer);
|
|
clAverageIntensityBufferClBuffer.reset();
|
|
}
|
|
if (clCollationBufferClBuffer)
|
|
{
|
|
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
|
|
clCollationBufferClBuffer);
|
|
clCollationBufferClBuffer.reset();
|
|
}
|
|
if (clAssemblyBufferClBuffer)
|
|
{
|
|
smoHooksPtr->ComputeManager_releaseUseHostPtrBuffer(
|
|
clAssemblyBufferClBuffer);
|
|
clAssemblyBufferClBuffer.reset();
|
|
}
|
|
}
|
|
|
|
// Reset cached cl_mem handles
|
|
clCollationBuffer = nullptr;
|
|
clAverageIntensityBuffer = nullptr;
|
|
clAssemblyBuffer = nullptr;
|
|
|
|
// Release kernels
|
|
if (slotCompactorKernel)
|
|
{
|
|
clReleaseKernel(slotCompactorKernel);
|
|
slotCompactorKernel = nullptr;
|
|
}
|
|
if (collateKernel)
|
|
{
|
|
clReleaseKernel(collateKernel);
|
|
collateKernel = nullptr;
|
|
}
|
|
|
|
// Release programs
|
|
if (slotCompactorProgram)
|
|
{
|
|
clReleaseProgram(slotCompactorProgram);
|
|
slotCompactorProgram = nullptr;
|
|
}
|
|
if (collateProgram)
|
|
{
|
|
clReleaseProgram(collateProgram);
|
|
collateProgram = nullptr;
|
|
}
|
|
|
|
// Release compute device via smo hooks
|
|
if (smoHooksPtr && smoHooksPtr->ComputeManager_releaseDevice
|
|
&& computeDevice)
|
|
{
|
|
smoHooksPtr->ComputeManager_releaseDevice(computeDevice);
|
|
computeDevice.reset();
|
|
}
|
|
|
|
// Reset state variables
|
|
compactIsRunning = false;
|
|
collateIsRunning = false;
|
|
currentCompactKernelEvent = nullptr;
|
|
currentCollateKernelEvent = nullptr;
|
|
assemblyBufferPtr = nullptr;
|
|
assemblyBufferSize = 0;
|
|
collationBufferPtr = nullptr;
|
|
collationBufferSize = 0;
|
|
averageIntensityBufferPtr = nullptr;
|
|
averageIntensityBufferSize = 0;
|
|
frameAssemblyDesc = nullptr;
|
|
}
|
|
|
|
// Static callback for compact kernel event
|
|
void CL_CALLBACK OpenClCollatingAndMeshingEngine::compactKernelEventCallback(
|
|
cl_event /*event*/, cl_int event_command_exec_status, void* user_data)
|
|
{
|
|
OpenClCollatingAndMeshingEngine* engine =
|
|
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
|
|
|
if (!engine || !engine->compactKernelCb)
|
|
{ return; }
|
|
|
|
// Post to io_service to call callback on the correct thread
|
|
if (engine->parent.device && engine->parent.device->componentThread)
|
|
{
|
|
engine->parent.device->componentThread->getIoService().post(
|
|
std::bind(engine->compactKernelCb, event_command_exec_status));
|
|
}
|
|
}
|
|
|
|
// Static callback for collate kernel event
|
|
void CL_CALLBACK OpenClCollatingAndMeshingEngine::collateKernelEventCallback(
|
|
cl_event /*event*/, cl_int event_command_exec_status, void* user_data)
|
|
{
|
|
OpenClCollatingAndMeshingEngine* engine =
|
|
static_cast<OpenClCollatingAndMeshingEngine*>(user_data);
|
|
|
|
if (!engine || !engine->collateKernelCb)
|
|
{ return; }
|
|
|
|
// Post to io_service to call callback on the correct thread
|
|
if (engine->parent.device && engine->parent.device->componentThread)
|
|
{
|
|
engine->parent.device->componentThread->getIoService().post(
|
|
std::bind(engine->collateKernelCb, event_command_exec_status));
|
|
}
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::startCompactKernel(
|
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded,
|
|
compactKernelCbFn callback)
|
|
{
|
|
// Store the caller's callback
|
|
compactKernelCb = std::move(callback);
|
|
|
|
// Validate buffers callable
|
|
auto validateBuffers = [this, &assemblyBuff]() {
|
|
struct iovec assemblyIov = assemblyBuff.getClEngineIovec();
|
|
if (assemblyIov.iov_base != assemblyBufferPtr
|
|
|| assemblyIov.iov_len != assemblyBufferSize)
|
|
{
|
|
throw std::runtime_error(
|
|
std::string(__func__) + ": buffer mismatch - buffers have "
|
|
"changed");
|
|
}
|
|
};
|
|
|
|
// Setup args callable
|
|
auto setupArgs = [this, &assemblyBuff, nSucceeded]() {
|
|
return setupSlotCompactorsArgs(assemblyBuff, nSucceeded);
|
|
};
|
|
|
|
/** EXPLANAITON:
|
|
* Map assembly buffer as WRITE_INVALIDATE_REGION to inform OpenCL that host
|
|
* (io_uring) has written data, and that the host doesn't care what the
|
|
* prior contents of the device's cache see. The device must invalidate
|
|
* its own view of the HOST_PTR and accept our view.
|
|
*
|
|
* Then immediately unmap to let OpenCL make the changes visible to the GPU
|
|
*/
|
|
if (!mapAssemblyBuffer(CL_MAP_WRITE_INVALIDATE_REGION))
|
|
{
|
|
std::cerr << __func__ << ": failed to map assembly buffer" << std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Unmap immediately to sync host writes to GPU
|
|
unmapAssemblyBuffer();
|
|
|
|
bool success = startKernel(
|
|
slotCompactorKernel,
|
|
¤tCompactKernelEvent,
|
|
setupArgs,
|
|
validateBuffers,
|
|
1, // globalWorkSize
|
|
compactKernelEventCallback,
|
|
"slotCompactor",
|
|
compactIsRunning);
|
|
|
|
if (!success) { return false; }
|
|
return true;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::startCollateKernel(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
|
collateKernelCbFn callback)
|
|
{
|
|
// Store the caller's callback
|
|
collateKernelCb = std::move(callback);
|
|
|
|
// Validate buffers callable
|
|
auto validateBuffers = [this]() {
|
|
struct iovec assemblyIov = parent.assemblyBuffer.getClEngineIovec();
|
|
struct iovec collationIov = parent.collationBuffer.getClEngineIovec();
|
|
struct iovec averageIntensityIov = parent.averageIntensityBuffer
|
|
.getClEngineIovec();
|
|
|
|
if (assemblyIov.iov_base != assemblyBufferPtr
|
|
|| assemblyIov.iov_len != assemblyBufferSize
|
|
|| collationIov.iov_base != collationBufferPtr
|
|
|| collationIov.iov_len != collationBufferSize
|
|
|| averageIntensityIov.iov_base != averageIntensityBufferPtr
|
|
|| averageIntensityIov.iov_len != averageIntensityBufferSize)
|
|
{
|
|
throw std::runtime_error(
|
|
std::string(__func__) + ": buffer mismatch - buffers have changed");
|
|
}
|
|
};
|
|
|
|
// Setup args callable
|
|
auto setupArgs = [this, intensityStimFrame, ambienceStimFrame]()
|
|
{
|
|
return setupCollateDgramsArgs(intensityStimFrame, ambienceStimFrame);
|
|
};
|
|
|
|
/** EXPLANATION:
|
|
* It shouldn't be necessary to map the assembly/collation buffers here
|
|
* since we don't need to read/write them on the host CPUs (unless we're
|
|
* intervening to debug; in which case we should map them as CL_MAP_READ).
|
|
*
|
|
* Otherwise, the foreign GPU's view of the data in the assembly buffer
|
|
* is currently up to date; and the collation buffer's state is undefined...
|
|
* and also irrelevant since it's only going to be used for output anyway.
|
|
*/
|
|
|
|
if (!mapAssemblyBuffer(CL_MAP_WRITE_INVALIDATE_REGION))
|
|
{
|
|
std::cerr << __func__ << ": failed to map assembly buffer" << std::endl;
|
|
return false;
|
|
}
|
|
|
|
unmapAssemblyBuffer();
|
|
if (!mapCollationBuffer(CL_MAP_WRITE))
|
|
{
|
|
std::cerr << __func__ << ": failed to map assembly buffer" << std::endl;
|
|
return false;
|
|
}
|
|
|
|
unmapCollationBuffer();
|
|
if (!mapAverageIntensityBuffer(CL_MAP_WRITE))
|
|
{
|
|
std::cerr << __func__ << ": failed to map average intensity buffer"
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
unmapAverageIntensityBuffer();
|
|
|
|
// Map/unmap intensity buffer if it exists
|
|
if (intensityStimFrame.has_value())
|
|
{
|
|
StimulusFrame& intensityFrame = intensityStimFrame->get();
|
|
cl_mem intensityClBuffer = intensityFrame.clBuffer
|
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
|
|
|
if (intensityClBuffer)
|
|
{
|
|
void* mappedIntensityBuffer = nullptr;
|
|
if (!mapBuffer(
|
|
intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
|
CL_MAP_WRITE_INVALIDATE_REGION, mappedIntensityBuffer))
|
|
{
|
|
std::cerr << __func__ << ": failed to map intensity buffer"
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
|
|
}
|
|
}
|
|
|
|
// Calculate global work size (just num slots in the frame)
|
|
size_t globalWorkSize = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
|
|
|
bool success = startKernel(
|
|
collateKernel,
|
|
¤tCollateKernelEvent,
|
|
setupArgs,
|
|
validateBuffers,
|
|
globalWorkSize,
|
|
collateKernelEventCallback,
|
|
"collateDgrams",
|
|
collateIsRunning);
|
|
|
|
if (!success) { return false; }
|
|
return true;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernel(
|
|
const char* kernelSource, size_t kernelSourceLen,
|
|
const char* kernelName, cl_program& program, cl_kernel& kernel)
|
|
{
|
|
cl_int err;
|
|
|
|
// Create program from source
|
|
program = clCreateProgramWithSource(
|
|
computeDevice->context, 1, &kernelSource, &kernelSourceLen, &err);
|
|
|
|
if (err != CL_SUCCESS || !program)
|
|
{
|
|
std::cerr << __func__ << ": failed to create " << kernelName
|
|
<< " program: " << err << std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Build program
|
|
err = clBuildProgram(program, 1, &computeDevice->device,
|
|
nullptr, nullptr, nullptr);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to build " << kernelName
|
|
<< " program: " << err << std::endl;
|
|
|
|
// Print build log if available
|
|
size_t logSize = 0;
|
|
clGetProgramBuildInfo(
|
|
program, computeDevice->device, CL_PROGRAM_BUILD_LOG,
|
|
0, nullptr, &logSize);
|
|
|
|
if (logSize > 0)
|
|
{
|
|
std::vector<char> log(logSize);
|
|
clGetProgramBuildInfo(
|
|
program, computeDevice->device, CL_PROGRAM_BUILD_LOG,
|
|
logSize, log.data(), nullptr);
|
|
std::cerr << kernelName << " build log: " << log.data()
|
|
<< std::endl;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// Create kernel
|
|
kernel = clCreateKernel(program, kernelName, &err);
|
|
if (err != CL_SUCCESS || !kernel)
|
|
{
|
|
std::cerr << __func__ << ": failed to create " << kernelName
|
|
<< " kernel: " << err << std::endl;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::compileAndPrepareKernels()
|
|
{
|
|
// Compile slotCompactor kernel
|
|
if (!compileAndPrepareKernel(
|
|
slotCompactorKernelStart, slotCompactorKernelNBytes,
|
|
"slotCompactor", slotCompactorProgram, slotCompactorKernel))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// Compile collateDgrams kernel
|
|
if (!compileAndPrepareKernel(
|
|
collateKernelStart, collateKernelNBytes,
|
|
"collate", collateProgram, collateKernel))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::setupSlotCompactorsArgs(
|
|
StagingBuffer& assemblyBuff, uint32_t nSucceeded)
|
|
{
|
|
// Extract parameters for slotCompactor kernel
|
|
uint32_t numSlots = static_cast<uint32_t>(frameAssemblyDesc->numSlots);
|
|
uint32_t slotStride = static_cast<uint32_t>(assemblyBuff.slotStrideNBytes);
|
|
uint32_t slotSize = static_cast<uint32_t>(frameAssemblyDesc->slotSizeBytes);
|
|
uint32_t nSucceededUint = static_cast<uint32_t>(nSucceeded);
|
|
|
|
// Set kernel arguments for slotCompactor
|
|
cl_int err;
|
|
err = clSetKernelArg(
|
|
slotCompactorKernel, 0, sizeof(cl_mem), &clAssemblyBuffer);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 0: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
err = clSetKernelArg(slotCompactorKernel, 1, sizeof(uint32_t), &numSlots);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 1: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
err = clSetKernelArg(slotCompactorKernel, 2, sizeof(uint32_t), &slotStride);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 2: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
err = clSetKernelArg(slotCompactorKernel, 3, sizeof(uint32_t), &slotSize);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
err = clSetKernelArg(
|
|
slotCompactorKernel, 4, sizeof(uint32_t), &nSucceededUint);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::setupCollateDgramsArgs(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame)
|
|
{
|
|
// Extract parameters for collateDgrams kernel
|
|
uint32_t slotStride = static_cast<uint32_t>(
|
|
parent.assemblyBuffer.slotStrideNBytes);
|
|
|
|
// Calculate nPointsPerSlot from device return mode
|
|
if (!parent.device)
|
|
{
|
|
std::cerr << __func__ << ": device not available" << std::endl;
|
|
return false;
|
|
}
|
|
int returnMode = static_cast<int>(parent.device->currentReturnMode);
|
|
uint32_t nPointsPerSlot = static_cast<uint32_t>(
|
|
livoxProto1::Device::getNPointsPerDgram(returnMode));
|
|
uint32_t nDgramsPerFrame = static_cast<uint32_t>(
|
|
frameAssemblyDesc->numSlots);
|
|
|
|
// Set kernel arguments for collateDgrams
|
|
cl_int err;
|
|
err = clSetKernelArg(collateKernel, 0, sizeof(cl_mem), &clAssemblyBuffer);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 0: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
err = clSetKernelArg(collateKernel, 1, sizeof(cl_mem), &clCollationBuffer);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 1: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Set intensity buffer argument (arg 2)
|
|
cl_mem intensityClBuffer = nullptr;
|
|
if (intensityStimFrame.has_value())
|
|
{
|
|
intensityClBuffer = intensityStimFrame->get().clBuffer
|
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
|
}
|
|
err = clSetKernelArg(collateKernel, 2, sizeof(cl_mem), &intensityClBuffer);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 2: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Set average intensity buffer argument (arg 3)
|
|
/** EXPLANATION:
|
|
* We only pass the average intensity buffer argument to the collate kernel
|
|
* when ambienceStimFrame is present. This is because the collate kernel
|
|
* only needs the average intensity buffer if ambience processing is
|
|
* requested (i.e., the ambience stimulus buffer is attached). If no
|
|
* ambienceStimFrame is supplied, we skip passing the buffer to avoid
|
|
* unnecessary work.
|
|
*/
|
|
cl_mem averageIntensityClBuffer = nullptr;
|
|
if (ambienceStimFrame.has_value()) {
|
|
averageIntensityClBuffer = clAverageIntensityBuffer;
|
|
}
|
|
err = clSetKernelArg(
|
|
collateKernel, 3, sizeof(cl_mem), &averageIntensityClBuffer);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 3: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
err = clSetKernelArg(collateKernel, 4, sizeof(uint32_t), &slotStride);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 4: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
err = clSetKernelArg(collateKernel, 5, sizeof(uint32_t), &nPointsPerSlot);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 5: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
err = clSetKernelArg(collateKernel, 6, sizeof(uint32_t), &nDgramsPerFrame);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to set kernel arg 6: " << err
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::stop()
|
|
{
|
|
// Acquire and release lock tightly around setting the flag
|
|
SpinLock::Guard lock(shouldAcceptRequestsLock);
|
|
bool wasAcceptingRequests = shouldAcceptRequests;
|
|
shouldAcceptRequests = false;
|
|
return wasAcceptingRequests;
|
|
}
|
|
|
|
void OpenClCollatingAndMeshingEngine::compactKernelComplete(bool isFinalizing)
|
|
{
|
|
cl_map_flags mapFlags;
|
|
|
|
/** EXPLANATION:
|
|
* Technically we should only need to do this if we plan to read the
|
|
* compacted slots for debugging purposes. Otherwise this is unnecessary.
|
|
*/
|
|
|
|
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
|
|
else { mapFlags = CL_MAP_READ; }
|
|
|
|
if (mapAssemblyBuffer(mapFlags)) {
|
|
unmapAssemblyBuffer();
|
|
}
|
|
clFlush(computeDevice->commandQueue);
|
|
|
|
// Stop only compact kernel
|
|
if (compactIsRunning && currentCompactKernelEvent)
|
|
{
|
|
clWaitForEvents(1, ¤tCompactKernelEvent);
|
|
clReleaseEvent(currentCompactKernelEvent);
|
|
currentCompactKernelEvent = nullptr;
|
|
}
|
|
|
|
clFinish(computeDevice->commandQueue);
|
|
compactKernelCb = [](cl_int){};
|
|
compactIsRunning = false;
|
|
}
|
|
|
|
void OpenClCollatingAndMeshingEngine::collateKernelComplete(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
|
bool isFinalizing)
|
|
{
|
|
(void)ambienceStimFrame;
|
|
|
|
cl_map_flags mapFlags;
|
|
/** EXPLANATION:
|
|
* Technically we should only need to do this if we plan to read the
|
|
* collated dgrams for debugging purposes. Otherwise this is unnecessary.
|
|
*/
|
|
if (isFinalizing) { mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; }
|
|
else { mapFlags = CL_MAP_READ; }
|
|
|
|
if (mapCollationBuffer(mapFlags)) {
|
|
unmapCollationBuffer();
|
|
}
|
|
|
|
if (mapAverageIntensityBuffer(mapFlags)) {
|
|
unmapAverageIntensityBuffer();
|
|
}
|
|
|
|
// Map/unmap intensity buffer if it exists
|
|
if (intensityStimFrame.has_value())
|
|
{
|
|
StimulusFrame& intensityFrame = intensityStimFrame->get();
|
|
cl_mem intensityClBuffer = intensityFrame.clBuffer
|
|
->getAssociatedBufferHandleForDevice(computeDevice);
|
|
|
|
if (intensityClBuffer)
|
|
{
|
|
void* mappedIntensityBuffer = nullptr;
|
|
if (mapBuffer(
|
|
intensityClBuffer, intensityFrame.slotDesc.nBytes,
|
|
CL_MAP_READ, mappedIntensityBuffer))
|
|
{
|
|
unmapBuffer(intensityClBuffer, mappedIntensityBuffer);
|
|
}
|
|
}
|
|
}
|
|
|
|
clFlush(computeDevice->commandQueue);
|
|
|
|
// Stop only collate kernel
|
|
if (collateIsRunning && currentCollateKernelEvent)
|
|
{
|
|
clWaitForEvents(1, ¤tCollateKernelEvent);
|
|
clReleaseEvent(currentCollateKernelEvent);
|
|
currentCollateKernelEvent = nullptr;
|
|
}
|
|
|
|
clFinish(computeDevice->commandQueue);
|
|
collateKernelCb = [](cl_int){};
|
|
collateIsRunning = false;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::mapBuffer(
|
|
cl_mem buffer, size_t size, cl_map_flags mapFlags, void*& mappedPtr)
|
|
{
|
|
if (!computeDevice->commandQueue || !buffer)
|
|
{
|
|
std::cerr << __func__ << ": engine not set up or invalid buffer"
|
|
<< std::endl;
|
|
|
|
return false;
|
|
}
|
|
|
|
// If already mapped, return early with success.
|
|
if (mappedPtr != nullptr) { return true; }
|
|
|
|
cl_int err;
|
|
mappedPtr = clEnqueueMapBuffer(
|
|
computeDevice->commandQueue, buffer, CL_TRUE, mapFlags,
|
|
0, size, 0, nullptr, nullptr, &err);
|
|
|
|
if (err != CL_SUCCESS || !mappedPtr)
|
|
{
|
|
std::cerr << __func__ << ": failed to map buffer: " << err
|
|
<< std::endl;
|
|
goto cleanup;
|
|
}
|
|
|
|
return true;
|
|
|
|
cleanup:
|
|
mappedPtr = nullptr;
|
|
return false;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::unmapBuffer(
|
|
cl_mem buffer, void*& mappedPtr
|
|
)
|
|
{
|
|
if (mappedPtr == nullptr)
|
|
{
|
|
// Already unmapped
|
|
return true;
|
|
}
|
|
|
|
if (!computeDevice->commandQueue || !buffer)
|
|
{
|
|
std::cerr << __func__ << ": engine not set up or invalid buffer.\n";
|
|
return false;
|
|
}
|
|
|
|
cl_int err;
|
|
cl_event unmapEvent = nullptr;
|
|
err = clEnqueueUnmapMemObject(
|
|
computeDevice->commandQueue, buffer, mappedPtr,
|
|
0, nullptr, &unmapEvent);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to unmap buffer: " << err
|
|
<< std::endl;
|
|
goto cleanup;
|
|
}
|
|
|
|
// Wait for unmap to complete and release the event
|
|
err = clWaitForEvents(1, &unmapEvent);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << __func__ << ": failed to wait for unmap event: " << err
|
|
<< std::endl;
|
|
goto cleanup_unmapEvent;
|
|
}
|
|
|
|
clReleaseEvent(unmapEvent);
|
|
mappedPtr = nullptr;
|
|
return true;
|
|
|
|
cleanup_unmapEvent:
|
|
clReleaseEvent(unmapEvent);
|
|
cleanup:
|
|
return false;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::mapAssemblyBuffer(cl_map_flags mapFlags)
|
|
{
|
|
return mapBuffer(
|
|
clAssemblyBuffer, assemblyBufferSize, mapFlags, mappedAssemblyBuffer);
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::unmapAssemblyBuffer()
|
|
{
|
|
unmapBuffer(clAssemblyBuffer, mappedAssemblyBuffer);
|
|
mappedAssemblyBuffer = nullptr;
|
|
return true;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::mapCollationBuffer(cl_map_flags mapFlags)
|
|
{
|
|
return mapBuffer(
|
|
clCollationBuffer, collationBufferSize, mapFlags,
|
|
mappedCollationBuffer);
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::unmapCollationBuffer()
|
|
{
|
|
unmapBuffer(clCollationBuffer, mappedCollationBuffer);
|
|
mappedCollationBuffer = nullptr;
|
|
return true;
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::mapAverageIntensityBuffer(
|
|
cl_map_flags mapFlags
|
|
)
|
|
{
|
|
return mapBuffer(
|
|
clAverageIntensityBuffer, averageIntensityBufferSize, mapFlags,
|
|
mappedAverageIntensityBuffer);
|
|
}
|
|
|
|
bool OpenClCollatingAndMeshingEngine::unmapAverageIntensityBuffer()
|
|
{
|
|
unmapBuffer(clAverageIntensityBuffer, mappedAverageIntensityBuffer);
|
|
mappedAverageIntensityBuffer = nullptr;
|
|
return true;
|
|
}
|
|
|
|
void OpenClCollatingAndMeshingEngine::produceAmbienceStimulusFrame(
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
|
uint32_t nSucceeded)
|
|
{
|
|
if (!ambienceStimFrame.has_value()) { return; }
|
|
|
|
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
|
|
parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
|
|
if (!ambienceBuff) { return; }
|
|
|
|
uint32_t lowVal = ambienceBuff->ambienceIntensityLowVal;
|
|
|
|
// Read average intensity values from averageIntensityBuffer
|
|
float* averageIntensityAverages = reinterpret_cast<float*>(
|
|
averageIntensityBufferPtr);
|
|
|
|
// Count frames whose average intensity is <= lowVal (postrin only)
|
|
uint32_t ambienceCount = 0;
|
|
for (uint32_t i = 0; i < nSucceeded; ++i)
|
|
{
|
|
float avg = averageIntensityAverages[i];
|
|
if (avg <= static_cast<float>(lowVal))
|
|
{
|
|
++ambienceCount;
|
|
}
|
|
}
|
|
|
|
// Write the ambience count to the ambienceStimFrame
|
|
StimulusFrame& ambienceFrame = ambienceStimFrame->get();
|
|
uint32_t* ambienceValue = reinterpret_cast<uint32_t*>(
|
|
ambienceFrame.slotDesc.vaddr);
|
|
ambienceValue[0] = ambienceCount;
|
|
}
|
|
|
|
class OpenClCollatingAndMeshingEngine::CompactCollateAndMeshFrameReq
|
|
: public PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>
|
|
{
|
|
private:
|
|
OpenClCollatingAndMeshingEngine& engine;
|
|
AsynchronousLoop frameAssemblyResult;
|
|
StimulusFrame& stimulusFrame;
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame;
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame;
|
|
|
|
public:
|
|
CompactCollateAndMeshFrameReq(
|
|
OpenClCollatingAndMeshingEngine& engine_,
|
|
AsynchronousLoop& asyncLoop,
|
|
StimulusFrame& stimulusFrame_,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame_,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame_,
|
|
const std::shared_ptr<ComponentThread>& caller,
|
|
Callback<compactCollateAndMeshFrameReqCbFn> cb)
|
|
: PostedAsynchronousContinuation<compactCollateAndMeshFrameReqCbFn>(
|
|
caller, cb),
|
|
engine(engine_),
|
|
frameAssemblyResult(asyncLoop), stimulusFrame(stimulusFrame_),
|
|
intensityStimFrame(intensityStimFrame_),
|
|
ambienceStimFrame(ambienceStimFrame_)
|
|
{}
|
|
|
|
public:
|
|
void callOriginalCallback(bool success)
|
|
{ callOriginalCb(success, std::ref(stimulusFrame)); }
|
|
|
|
public:
|
|
void compactCollateAndMeshFrameReq1_doCompact_posted(
|
|
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
|
|
{
|
|
SpinLock::Guard lock(engine.shouldAcceptRequestsLock);
|
|
if (!engine.shouldAcceptRequests)
|
|
{
|
|
callOriginalCallback(false);
|
|
return;
|
|
}
|
|
|
|
// Record compact kernel start time
|
|
engine.compactKernelStartTime = std::chrono::high_resolution_clock::now();
|
|
|
|
bool success = engine.startCompactKernel(
|
|
engine.parent.assemblyBuffer,
|
|
static_cast<uint32_t>(context->frameAssemblyResult.nSucceeded.load()),
|
|
std::bind(
|
|
&CompactCollateAndMeshFrameReq
|
|
::compactCollateAndMeshFrameReq2_compactDone_posted,
|
|
context.get(), context,
|
|
std::placeholders::_1));
|
|
|
|
if (!success)
|
|
{
|
|
engine.compactKernelComplete();
|
|
callOriginalCallback(false);
|
|
return;
|
|
}
|
|
}
|
|
|
|
void compactCollateAndMeshFrameReq2_compactDone_posted(
|
|
std::shared_ptr<CompactCollateAndMeshFrameReq> context,
|
|
cl_int compactStatus)
|
|
{
|
|
SpinLock::Guard lock(engine.shouldAcceptRequestsLock);
|
|
if (!engine.shouldAcceptRequests)
|
|
{
|
|
/** EXPLANATION:
|
|
* We intentionally don't call compactKernelComplete() here because
|
|
* if shouldAcceptRequests is false, then the caller that called
|
|
* finalize() will also be forced to call compactKernelComplete()
|
|
* inside of finalize().
|
|
*/
|
|
callOriginalCallback(false);
|
|
return;
|
|
}
|
|
|
|
engine.compactKernelComplete();
|
|
// Record compact kernel end time
|
|
engine.compactKernelEndTime = std::chrono::high_resolution_clock::now();
|
|
|
|
// If compact failed, call callback directly with failure
|
|
if (compactStatus != CL_SUCCESS)
|
|
{
|
|
callOriginalCallback(false);
|
|
return;
|
|
}
|
|
|
|
#if 0
|
|
// Print first 4 bytes of each slot
|
|
if (engine.frameAssemblyDesc)
|
|
{
|
|
for (size_t i = 0; i < engine.frameAssemblyDesc->numSlots; ++i) {
|
|
engine.parent.ioUringAssemblyEngine.printSlotBytes(i, 4);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
lock.unlockPrematurely();
|
|
context->compactCollateAndMeshFrameReq3_doCollate_posted(context);
|
|
}
|
|
|
|
void compactCollateAndMeshFrameReq3_doCollate_posted(
|
|
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
|
|
{
|
|
SpinLock::Guard lock(engine.shouldAcceptRequestsLock);
|
|
if (!engine.shouldAcceptRequests)
|
|
{
|
|
callOriginalCallback(false);
|
|
return;
|
|
}
|
|
|
|
// Record collate kernel start time
|
|
engine.collateKernelStartTime = std::chrono::high_resolution_clock::now();
|
|
|
|
bool success = engine.startCollateKernel(
|
|
context->intensityStimFrame, context->ambienceStimFrame,
|
|
std::bind(
|
|
&CompactCollateAndMeshFrameReq
|
|
::compactCollateAndMeshFrameReq4_collateDone_maybePosted,
|
|
context.get(), context,
|
|
std::placeholders::_1));
|
|
|
|
if (!success)
|
|
{
|
|
engine.collateKernelComplete(
|
|
context->intensityStimFrame, context->ambienceStimFrame);
|
|
|
|
callOriginalCallback(false);
|
|
return;
|
|
}
|
|
}
|
|
|
|
void compactCollateAndMeshFrameReq4_collateDone_maybePosted(
|
|
[[maybe_unused]] std::shared_ptr<CompactCollateAndMeshFrameReq> context,
|
|
cl_int collateStatus)
|
|
{
|
|
SpinLock::Guard lock(engine.shouldAcceptRequestsLock);
|
|
if (!engine.shouldAcceptRequests)
|
|
{
|
|
/* We intentionally don't call collateKernelComplete() here for the
|
|
* same reason as above.
|
|
*/
|
|
callOriginalCallback(false);
|
|
return;
|
|
}
|
|
|
|
/** EXPLANATION:
|
|
* The reason we don't call collateKernelComplete before checking
|
|
* shouldAcceptRequests is because if shouldAcceptRequests is false, then
|
|
* we shouldn't touch any of the collate cycle state...or any state within
|
|
* the engine at all, since finalize() may well have been called.
|
|
*
|
|
* Therefore it's finalize()'s responsibility to ensure that it properly
|
|
* completes/cleans up any in-flight operations.
|
|
*/
|
|
engine.collateKernelComplete(
|
|
context->intensityStimFrame, context->ambienceStimFrame);
|
|
|
|
// Record collate kernel end time
|
|
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
|
|
|
|
bool success = (collateStatus == CL_SUCCESS);
|
|
|
|
// Early callback + return pattern
|
|
if (!success)
|
|
{
|
|
callOriginalCallback(false);
|
|
return;
|
|
}
|
|
|
|
uint32_t nSucceeded = context->frameAssemblyResult.nSucceeded.load();
|
|
|
|
// Produce ambience frame if ambience buffer is attached
|
|
if (context->ambienceStimFrame.has_value())
|
|
{
|
|
engine.produceAmbienceStimulusFrame(
|
|
context->ambienceStimFrame, nSucceeded);
|
|
}
|
|
|
|
int returnMode = static_cast<int>(engine.parent.device->currentReturnMode);
|
|
size_t pointsPerDgram = livoxProto1::Device::getNPointsPerDgram(
|
|
returnMode);
|
|
size_t totalPoints = nSucceeded * pointsPerDgram;
|
|
|
|
// Count points with intensity greater than 116
|
|
size_t highIntensityCount = 0;
|
|
if (context->intensityStimFrame.has_value())
|
|
{
|
|
StimulusFrame& intensityFrame = context->intensityStimFrame->get();
|
|
float* intensityFloats = reinterpret_cast<float*>(intensityFrame.slotDesc.vaddr);
|
|
for (size_t i = 0; i < totalPoints; ++i)
|
|
{
|
|
float intensity = intensityFloats[i];
|
|
if (intensity >= 116.0f)
|
|
{
|
|
++highIntensityCount;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Print all averages above thresholds from average intensity buffer
|
|
if (context->ambienceStimFrame.has_value())
|
|
{
|
|
std::shared_ptr<PcloudAmbienceStimulusBuffer> ambienceBuff =
|
|
engine.parent.ambienceStimulusBuffer.load(std::memory_order_acquire);
|
|
uint32_t lowVal = ambienceBuff->ambienceIntensityLowVal;
|
|
uint32_t postrinThreshold = ambienceBuff->postrinInterestThreshold;
|
|
|
|
float* averageIntensityAverages = reinterpret_cast<float*>(
|
|
engine.averageIntensityBufferPtr);
|
|
// Count frames that met the postrin threshold
|
|
uint32_t framesMetThreshold = 0;
|
|
for (uint32_t i = 0; i < nSucceeded; ++i)
|
|
{
|
|
float avg = averageIntensityAverages[i];
|
|
if (avg <= static_cast<float>(lowVal))
|
|
{
|
|
++framesMetThreshold;
|
|
}
|
|
}
|
|
|
|
// Read the stimFrame value (ambience count)
|
|
StimulusFrame& ambienceFrame = context->ambienceStimFrame->get();
|
|
uint32_t* ambienceValue = reinterpret_cast<uint32_t*>(
|
|
ambienceFrame.slotDesc.vaddr);
|
|
uint32_t stimFrameValue = ambienceValue[0];
|
|
|
|
bool meetsPostrinThreshold = (framesMetThreshold >= postrinThreshold);
|
|
|
|
std::cout << __func__ << ": frames met threshold=" << framesMetThreshold
|
|
<< ", stimFrame value=" << stimFrameValue
|
|
<< ", postrin threshold=" << postrinThreshold
|
|
<< ", meets postrin=" << (meetsPostrinThreshold ? "yes" : "no")
|
|
<< std::endl;
|
|
}
|
|
|
|
std::cout << __func__ << ": intensityRingBufferIndex="
|
|
<< (context->intensityStimFrame.has_value() ?
|
|
context->intensityStimFrame->get().ringBufferIndex : SIZE_MAX)
|
|
<< ", ambienceRingBufferIndex="
|
|
<< (context->ambienceStimFrame.has_value() ?
|
|
context->ambienceStimFrame->get().ringBufferIndex : SIZE_MAX)
|
|
<< ", pointsPerDgram=" << pointsPerDgram
|
|
<< ", nSucceeded=" << nSucceeded
|
|
<< ", totalPoints=" << totalPoints
|
|
<< ", highIntensityCount=" << highIntensityCount << std::endl;
|
|
|
|
callOriginalCallback(success);
|
|
}
|
|
};
|
|
|
|
void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
|
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> intensityStimFrame,
|
|
std::optional<std::reference_wrapper<StimulusFrame>> ambienceStimFrame,
|
|
Callback<compactCollateAndMeshFrameReqCbFn> callback)
|
|
{
|
|
{
|
|
SpinLock::Guard lock(shouldAcceptRequestsLock);
|
|
if (!shouldAcceptRequests)
|
|
{
|
|
callback.callbackFn(false, stimulusFrame);
|
|
return;
|
|
}
|
|
}
|
|
|
|
auto caller = smoHooksPtr->ComponentThread_getSelf();
|
|
auto request = std::make_shared<CompactCollateAndMeshFrameReq>(
|
|
*this, asyncLoop, stimulusFrame, intensityStimFrame, ambienceStimFrame,
|
|
caller,
|
|
std::move(callback));
|
|
|
|
// Check if compaction is needed
|
|
bool needsCompaction = IoUringAssemblyEngine::compactionIsNeeded(
|
|
asyncLoop.nSucceeded.load(), asyncLoop.nTotal);
|
|
|
|
// Start with compaction if needed, then chain to collation
|
|
if (needsCompaction)
|
|
{
|
|
parent.device->componentThread->getIoService().post(
|
|
STC(std::bind(
|
|
&CompactCollateAndMeshFrameReq
|
|
::compactCollateAndMeshFrameReq1_doCompact_posted,
|
|
request.get(), request)));
|
|
}
|
|
else
|
|
{
|
|
// Skip compaction, go straight to collation
|
|
parent.device->componentThread->getIoService().post(
|
|
STC(std::bind(
|
|
&CompactCollateAndMeshFrameReq
|
|
::compactCollateAndMeshFrameReq3_doCollate_posted,
|
|
request.get(), request)));
|
|
}
|
|
}
|
|
|
|
std::chrono::milliseconds OpenClCollatingAndMeshingEngine::getCompactKernelDuration() const
|
|
{
|
|
auto duration = compactKernelEndTime - compactKernelStartTime;
|
|
if (duration.count() < 0)
|
|
{
|
|
return std::chrono::milliseconds(0);
|
|
}
|
|
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
|
|
}
|
|
|
|
std::chrono::milliseconds OpenClCollatingAndMeshingEngine::getCollateKernelDuration() const
|
|
{
|
|
auto duration = collateKernelEndTime - collateKernelStartTime;
|
|
if (duration.count() < 0)
|
|
{
|
|
return std::chrono::milliseconds(0);
|
|
}
|
|
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
|
|
}
|
|
|
|
} // namespace stim_buff
|
|
} // namespace smo
|