#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "livoxGen1.h" #include "pcloudStimulusProducer.h" #ifndef SMO_DEBUG_PCLOUD_AMBIENCE_INTRIN #define SMO_DEBUG_PCLOUD_AMBIENCE_INTRIN 0 #endif namespace smo { namespace stim_buff { #if SMO_DEBUG_PCLOUD_AMBIENCE_INTRIN namespace { char ambienceComparatorOpChar(ParamComparatorOp op) { switch (op) { case OP_CMP_GT: return '>'; case OP_CMP_LT: return '<'; } return '?'; } } // namespace #endif extern const SmoCallbacks* smoHooksPtr; // OpenCL kernels are used to collate and produce our StimFrames. static StagingBuffer::IOEngineConstraints openClInputConstraints( /** FIXME: * This should eventually be aligned to 4B and padded to 12B. */ // slotStartAlignmentByteVal (page alignment) sizeof(float), // slotPadToNBytes (XYZ = 3 floats per point) sizeof(float) * 3, // frameStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // framePadToNBytes (pointer size) static_cast(sysconf(_SC_PAGE_SIZE))); // OpenCL kernels are used to collate and produce our StimFrames. static StagingBuffer::IOEngineConstraints openClMeshInputConstraints( // slotStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // slotPadToNBytes: This is dynamically calculated based on the return mode. sizeof(float) * 3, // frameStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // framePadToNBytes (pointer size) static_cast(sysconf(_SC_PAGE_SIZE))); static StagingBuffer::IOEngineConstraints openClIntensityInputConstraints( // slotStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // slotPadToNBytes: This is dynamically calculated based on the return mode. sizeof(float), // frameStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // framePadToNBytes (pointer size) static_cast(sysconf(_SC_PAGE_SIZE))); /* IOEngineConstraints for Pcloud[Light|Dark]AmbienceStimulusBuffer's * StagingBuffer, which backs SpMcRingBuffer (one StimulusFrame per ring * slot — a single uint32 passband count). slotPadToNBytes is sized in * ctor to sizeof(uint32_t). */ static StagingBuffer::IOEngineConstraints openClAmbienceInputConstraints( sizeof(uint32_t), sizeof(uint32_t), // frameStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // framePadToNBytes (page alignment) static_cast(sysconf(_SC_PAGE_SIZE))); /* IOEngineConstraints for OClCollMeshEngn's per-slot averageIntensityBuffer * staging area. Holds nDgramsPerStagingBufferFrame floats; attached ambience * stimbuffs read from it to compute passband counts after collate. */ static StagingBuffer::IOEngineConstraints openClAverageIntensityConstraints( sizeof(float), sizeof(float), static_cast(sysconf(_SC_PAGE_SIZE)), static_cast(sysconf(_SC_PAGE_SIZE))); PcloudStimulusProducer::PcloudStimulusProducer( const std::shared_ptr &deviceAttachmentSpec, std::shared_ptr &device, const PcloudFormatDesc& formatDesc, size_t nDgramsPerStagingBufferFrame) : StimulusProducer( deviceAttachmentSpec, device->componentThread->getIoService()), nDgramsPerStagingBufferFrame(nDgramsPerStagingBufferFrame), device(device), formatDesc(formatDesc), openClCollatingAndMeshingEngine(*this), assemblyBuffer( StagingBuffer::IOEngineConstraints::ioUringConstraints, StagingBuffer::IOEngineConstraints::openClInputConstraints, nDgramsPerStagingBufferFrame), ioUringAssemblyEngine(*this, nDgramsPerStagingBufferFrame), collationBuffer( StagingBuffer::IOEngineConstraints::openClInputConstraints, StagingBuffer::IOEngineConstraints::openClInputConstraints, nDgramsPerStagingBufferFrame), collationBufferMlockPinner(collationBuffer.makeMlockPinner()), averageIntensityBuffer( openClAverageIntensityConstraints, openClAverageIntensityConstraints, nDgramsPerStagingBufferFrame), averageIntensityBufferMlockPinner(averageIntensityBuffer.makeMlockPinner()), pcloudFrameDumper(deviceAttachmentSpec), tempStimulusFrameMem(0), tempStimulusFrame( FrameAssemblyDesc::SlotDesc{ 0, reinterpret_cast(&tempStimulusFrameMem), sizeof(tempStimulusFrameMem)}, *smoHooksPtr, 0, SIZE_MAX) { if (smoHooksPtr->OptionParser_getOptions().verbose) { std::cout << __func__ << ": assembly buffer : " << assemblyBuffer.stringify() << "\n\tcollation buffer : " << collationBuffer.stringify() << "\n"; } std::cout << __func__ << ": Device's component thread is " << device->componentThread->name << std::endl; #ifndef CONFIG_WORLD_USE_BODY_THREAD if (smoHooksPtr->ComponentThread_getSelf()->id != SmoThreadId::WORLD) #else if (smoHooksPtr->ComponentThread_getSelf()->id != SmoThreadId::BODY) #endif { std::string errMsg = std::string(__func__) + ": PcloudStimulusProducer constructor called on non-world/body thread " + smoHooksPtr->ComponentThread_getSelf()->name; std::cout << errMsg << std::endl; // throw std::runtime_error(errMsg); } } bool PcloudStimulusProducer::supportsQualeIfaceApi( const std::string& qualeIfaceApi) { return qualeIfaceApi == "mesh" || qualeIfaceApi == "pcloudIntensity" || qualeIfaceApi == "pcloudLightAmbience" || qualeIfaceApi == "pcloudDarkAmbience"; } bool PcloudStimulusProducer::exportsQualeIfaceApi( const std::string& qualeIfaceApi) const { return supportsQualeIfaceApi(qualeIfaceApi); } void PcloudStimulusProducer::start() { std::cout << __func__ << ": Starting PcloudStimulusProducer for device " << device->discoveredDevice.deviceIdentifier << std::endl; pcloudFrameDumper.prepareForRun(); // Call ioUringAssemblyEngine setup() as the first step if (!ioUringAssemblyEngine.setup()) { std::cout <<__func__ <<"Failed to setup() " <<"IOUringAssemblyEngine.\n"; return; } if (!openClCollatingAndMeshingEngine.setup()) { ioUringAssemblyEngine.finalize(); std::cout <<__func__ <<"Failed to setup() " <<"OClCollMeshEngine.\n"; return; } // Call base class start() as the final step StimulusProducer::start(); } void PcloudStimulusProducer::stop() { std::cout << __func__ << ": Stopping PcloudStimulusProducer for device " << device->discoveredDevice.deviceIdentifier << std::endl; // Call base class stop() as the first step StimulusProducer::stop(); // Call ioUringAssemblyEngine stop() as the final step openClCollatingAndMeshingEngine.finalize(); ioUringAssemblyEngine.finalize(); } void produceStimFrameAck(void) { } // Helper function to parse histbuffMs from device attachment spec static int parseHistbuffMs( const std::shared_ptr& spec) { const std::vector histbuffParamNames = { "history-buffer-duration-ms", "hist-buff-duration-ms", "histbuff-duration-ms", "histbuff-ms" }; return device::DeviceAttachmentSpec::parseOptionalParamAsIntWithSynonyms( spec->qualeIfaceApiParams, histbuffParamNames, 30000); } std::shared_ptr PcloudStimulusProducer::getAttachedStimulusBuffer( const std::shared_ptr& spec) const { // Call base class implementation auto buffer = StimulusProducer::getAttachedStimulusBuffer(spec); if (!buffer) { return nullptr; } // Optionally validate/upcast the buffer type matches expected type // based on qualeIfaceApi (for type safety) const std::string& qualeIfaceApi = spec->qualeIfaceApi; if (qualeIfaceApi == "mesh") { if (std::dynamic_pointer_cast(buffer)) { return buffer; } } else if (qualeIfaceApi == "pcloudIntensity") { if (std::dynamic_pointer_cast(buffer)) { return buffer; } } else if (qualeIfaceApi == "pcloudLightAmbience") { if (std::dynamic_pointer_cast(buffer)) { return buffer; } } else if (qualeIfaceApi == "pcloudDarkAmbience") { if (std::dynamic_pointer_cast(buffer)) { return buffer; } } // Type mismatch - return nullptr return nullptr; } void PcloudStimulusProducer::destroyAttachedStimulusBuffer( const std::shared_ptr& buffer) { if (!buffer) { return; } this->stop(); // Clear specialized buffer pointers if they match auto meshBuff = meshStimulusBuffer.load(std::memory_order_acquire); if (meshBuff == buffer) { meshBuff.reset(); meshStimulusBuffer.store(nullptr, std::memory_order_release); } auto intensityBuff = intensityStimulusBuffer.load(std::memory_order_acquire); if (intensityBuff == buffer) { intensityBuff.reset(); intensityStimulusBuffer.store(nullptr, std::memory_order_release); } auto lightAmbienceBuff = lightAmbienceStimulusBuffer.load( std::memory_order_acquire); if (lightAmbienceBuff == buffer) { lightAmbienceBuff.reset(); lightAmbienceStimulusBuffer.store(nullptr, std::memory_order_release); } auto darkAmbienceBuff = darkAmbienceStimulusBuffer.load( std::memory_order_acquire); if (darkAmbienceBuff == buffer) { darkAmbienceBuff.reset(); darkAmbienceStimulusBuffer.store(nullptr, std::memory_order_release); } // Call base class implementation to remove from attachedStimulusBuffers StimulusProducer::destroyAttachedStimulusBuffer(buffer); this->start(); } std::shared_ptr PcloudStimulusProducer::getOrCreateAttachedStimulusBuffer( const std::shared_ptr& deviceAttachmentSpec ) { // Check if buffer already exists (idempotent) auto existingBuffer = getAttachedStimulusBuffer(deviceAttachmentSpec); if (existingBuffer) { return existingBuffer; } // Parse histbuffMs from device attachment spec int histbuffMs = parseHistbuffMs(deviceAttachmentSpec); // Parse qualeIfaceApi to determine buffer type const std::string& qualeIfaceApi = deviceAttachmentSpec->qualeIfaceApi; if (qualeIfaceApi == "mesh") { size_t nPointsPerDgram = livoxProto1::Device::getNPointsPerDgram( static_cast(device->currentReturnMode)); /* Calculate slotStrideNBytes: * nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 3 */ size_t slotStrideNBytes = this->nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 3; // Reuse openClMeshInputConstraints, only modify slotPadToNBytes openClMeshInputConstraints.slotPadToNBytes = slotStrideNBytes; auto meshBuffer = std::make_shared( *this, deviceAttachmentSpec, histbuffMs, openClMeshInputConstraints, openClMeshInputConstraints, *smoHooksPtr, CL_MEM_READ_WRITE); this->stop(); addAttachedStimulusBufferIfNotExists(meshBuffer); meshStimulusBuffer.store(meshBuffer, std::memory_order_release); this->start(); return meshBuffer; } else if (qualeIfaceApi == "pcloudIntensity") { size_t nPointsPerDgram = livoxProto1::Device::getNPointsPerDgram( static_cast(device->currentReturnMode)); /* Calculate slotStrideNBytes: * nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 1 */ size_t slotStrideNBytes = this->nDgramsPerStagingBufferFrame * nPointsPerDgram * sizeof(float) * 1; // Reuse openClIntensityInputConstraints, only modify slotPadToNBytes openClIntensityInputConstraints.slotPadToNBytes = slotStrideNBytes; auto intensityBuffer = std::make_shared( *this, deviceAttachmentSpec, histbuffMs, openClIntensityInputConstraints, openClIntensityInputConstraints, *smoHooksPtr, CL_MEM_READ_WRITE); this->stop(); addAttachedStimulusBufferIfNotExists(intensityBuffer); intensityStimulusBuffer.store( intensityBuffer, std::memory_order_release); this->start(); return intensityBuffer; } else if (qualeIfaceApi == "pcloudLightAmbience") { auto lightAmbienceStimBuff = std::make_shared( *this, deviceAttachmentSpec, histbuffMs, openClAmbienceInputConstraints, openClAmbienceInputConstraints, *smoHooksPtr, CL_MEM_READ_WRITE, this->nDgramsPerStagingBufferFrame); this->stop(); addAttachedStimulusBufferIfNotExists(lightAmbienceStimBuff); lightAmbienceStimulusBuffer.store( lightAmbienceStimBuff, std::memory_order_release); this->start(); return lightAmbienceStimBuff; } else if (qualeIfaceApi == "pcloudDarkAmbience") { auto darkAmbienceStimBuff = std::make_shared( *this, deviceAttachmentSpec, histbuffMs, openClAmbienceInputConstraints, openClAmbienceInputConstraints, *smoHooksPtr, CL_MEM_READ_WRITE, this->nDgramsPerStagingBufferFrame); this->stop(); addAttachedStimulusBufferIfNotExists(darkAmbienceStimBuff); darkAmbienceStimulusBuffer.store( darkAmbienceStimBuff, std::memory_order_release); this->start(); return darkAmbienceStimBuff; } else { throw std::runtime_error( "Unsupported qualeIfaceApi: '" + qualeIfaceApi + "' for " "PcloudStimulusProducer. " "Supported values: mesh, pcloudIntensity, " "pcloudLightAmbience, pcloudDarkAmbience"); } } void PcloudStimulusProducer::stimFrameProductionTimesliceInd() { produceFrameReq({nullptr, nullptr}); } class PcloudStimulusProducer::ProduceFrameReq : public sscl::cps::PostedAsynchronousContinuation { private: PcloudStimulusProducer& pcloudProducer; sscl::AsynchronousLoop frameAssemblyResult; StimulusFrame& stimulusFrame; std::optional> intensityStimFrame; std::optional> lightAmbienceStimFrame; std::optional> darkAmbienceStimFrame; public: ProduceFrameReq( PcloudStimulusProducer& producer, const std::shared_ptr& caller, sscl::cps::Callback cb) : sscl::cps::PostedAsynchronousContinuation(caller, cb), pcloudProducer(producer), frameAssemblyResult(0), stimulusFrame(producer.tempStimulusFrame) {} public: void callOriginalCallback() { pcloudProducer.allowNextStimulusFrame(); callOriginalCb(); } public: void produceFrameReq1_doAssemble_posted( std::shared_ptr context) { sscl::SpinLock::Guard lock(pcloudProducer.shouldContinueLock); if (!pcloudProducer.shouldContinue) { callOriginalCallback(); return; } pcloudProducer.ioUringAssemblyEngine.assembleFrameReq( {context, std::bind( &ProduceFrameReq::produceFrameReq2_assembleDone, context.get(), context, std::placeholders::_1, std::placeholders::_2)}); } void produceFrameReq2_assembleDone( std::shared_ptr context, bool success, sscl::AsynchronousLoop loop) { sscl::SpinLock::Guard lock(pcloudProducer.shouldContinueLock); if (!pcloudProducer.shouldContinue) { callOriginalCallback(); return; } if (!success) { callOriginalCallback(); if (pcloudProducer.attachedStimulusBuffers.size() > 0) { std::cerr << __func__ << ": Failed to assemble frame.\n"; } return; } context->frameAssemblyResult = loop; // Check if intensity buffer is attached and acquire frame if so if (auto intensityBuff = pcloudProducer.intensityStimulusBuffer.load( std::memory_order_acquire)) { size_t intensityRingbuffIndex = intensityBuff ->ringBuffer.getIndexToProduceInto(); StimulusFrame& intensityStimFrame = intensityBuff ->ringBuffer.getDataAtSlot( intensityRingbuffIndex); intensityStimFrame.lock.writeAcquire(); context->intensityStimFrame = std::make_optional( std::ref(intensityStimFrame)); } else { context->intensityStimFrame = std::nullopt; } // Check if light ambience buffer is attached and acquire frame if so std::optional lightAmbienceProductionDescDesc; if (auto lightAmbienceBuff = pcloudProducer.lightAmbienceStimulusBuffer.load( std::memory_order_acquire)) { size_t lightAmbienceRingbuffIndex = lightAmbienceBuff ->ringBuffer.getIndexToProduceInto(); StimulusFrame& lightAmbienceStimFrame = lightAmbienceBuff ->ringBuffer.getDataAtSlot(lightAmbienceRingbuffIndex); lightAmbienceStimFrame.lock.writeAcquire(); context->lightAmbienceStimFrame = std::make_optional( std::ref(lightAmbienceStimFrame)); lightAmbienceProductionDescDesc = AmbienceProductionDesc{ std::ref(lightAmbienceStimFrame), lightAmbienceBuff->passbandCountGtComparator}; } else { context->lightAmbienceStimFrame = std::nullopt; } // Check if dark ambience buffer is attached and acquire frame if so std::optional darkAmbienceProductionDescDesc; if (auto darkAmbienceBuff = pcloudProducer.darkAmbienceStimulusBuffer.load( std::memory_order_acquire)) { size_t darkAmbienceRingbuffIndex = darkAmbienceBuff ->ringBuffer.getIndexToProduceInto(); StimulusFrame& darkAmbienceStimFrame = darkAmbienceBuff ->ringBuffer.getDataAtSlot(darkAmbienceRingbuffIndex); darkAmbienceStimFrame.lock.writeAcquire(); context->darkAmbienceStimFrame = std::make_optional( std::ref(darkAmbienceStimFrame)); darkAmbienceProductionDescDesc = AmbienceProductionDesc{ std::ref(darkAmbienceStimFrame), darkAmbienceBuff->passbandCountLtComparator}; } else { context->darkAmbienceStimFrame = std::nullopt; } pcloudProducer.openClCollatingAndMeshingEngine.compactCollateAndMeshFrameReq( loop, stimulusFrame, context->intensityStimFrame, std::move(lightAmbienceProductionDescDesc), std::move(darkAmbienceProductionDescDesc), {context, std::bind( &ProduceFrameReq::produceFrameReq3_compactCollateDone, context.get(), context, std::placeholders::_1, std::placeholders::_2)}); } void produceFrameReq3_compactCollateDone( [[maybe_unused]] std::shared_ptr context, bool success, StimulusFrame& /*stimulusFrame*/) { #if SMO_DEBUG_PCLOUD_AMBIENCE_INTRIN uint32_t logLightPassbandCount = 0; uint32_t logDarkPassbandCount = 0; bool logLightAmbience = false; bool logDarkAmbience = false; if (success) { if (context->lightAmbienceStimFrame.has_value()) { logLightPassbandCount = *reinterpret_cast( context->lightAmbienceStimFrame->get().slotDesc.vaddr); logLightAmbience = true; } if (context->darkAmbienceStimFrame.has_value()) { logDarkPassbandCount = *reinterpret_cast( context->darkAmbienceStimFrame->get().slotDesc.vaddr); logDarkAmbience = true; } } #endif // Release intensity frame if it was used if (context->intensityStimFrame.has_value()) { context->intensityStimFrame->get().lock.writeRelease(); } // Release ambience frames if they were used if (context->lightAmbienceStimFrame.has_value()) { context->lightAmbienceStimFrame->get().lock.writeRelease(); } if (context->darkAmbienceStimFrame.has_value()) { context->darkAmbienceStimFrame->get().lock.writeRelease(); } sscl::SpinLock::Guard lock(pcloudProducer.shouldContinueLock); if (!pcloudProducer.shouldContinue) { callOriginalCallback(); return; } if (!success) { std::cerr << __func__ << ": Failed to compact and collate frame" << std::endl; } else { lock.unlockPrematurely(); if (pcloudProducer.pcloudFrameDumper.isEnabled()) { try { pcloudProducer.pcloudFrameDumper.dumpProducedFrame( *pcloudProducer.device, pcloudProducer.collationBuffer, context->frameAssemblyResult); } catch (const std::exception& e) { std::cerr << __func__ << ": Failed to dump pcloud frame: " << e.what() << std::endl; } } #if SMO_DEBUG_PCLOUD_AMBIENCE_INTRIN if (logLightAmbience) { auto lightBuff = pcloudProducer.lightAmbienceStimulusBuffer.load( std::memory_order_acquire); if (lightBuff) { std::cerr << __func__ << ": pcloudLightAmbience " << "passbandCount=" << logLightPassbandCount << " (per-slot avg intensity " << ambienceComparatorOpChar( lightBuff->passbandCountGtComparator.op) << " " << lightBuff->passbandCountGtComparator.value << ")"; if (lightBuff->negtrinInterestConfig.has_value()) { const auto& nc = *lightBuff->negtrinInterestConfig; std::cerr << " negtrinInterestThr=" << nc.threshold; if (nc.percentage != 0U) { std::cerr << " (from " << nc.percentage << "%)"; } std::cerr << " meetsNegtrinInterest=" << (lightBuff->shouldTriggerNegtrinEvent( logLightPassbandCount) ? "yes" : "no"); } else { std::cerr << " negtrinInterest(n/a)"; } std::cerr << std::endl; } } if (logDarkAmbience) { auto darkBuff = pcloudProducer.darkAmbienceStimulusBuffer.load( std::memory_order_acquire); if (darkBuff) { std::cerr << __func__ << ": pcloudDarkAmbience " << "passbandCount=" << logDarkPassbandCount << " (per-slot avg intensity " << ambienceComparatorOpChar( darkBuff->passbandCountLtComparator.op) << " " << darkBuff->passbandCountLtComparator.value << ")"; if (darkBuff->postrinInterestConfig.has_value()) { const auto& pc = *darkBuff->postrinInterestConfig; std::cerr << " postrinInterestThr=" << pc.threshold; if (pc.percentage != 0U) { std::cerr << " (from " << pc.percentage << "%)"; } std::cerr << " meetsPostrinInterest=" << (darkBuff->shouldTriggerPostrinEvent( logDarkPassbandCount) ? "yes" : "no"); } else { std::cerr << " postrinInterest(n/a)"; } std::cerr << std::endl; } } #endif #if 0 // Print execution durations auto assemblyDuration = pcloudProducer.ioUringAssemblyEngine.getAssemblyDuration(); auto compactDuration = pcloudProducer.openClCollatingAndMeshingEngine.getCompactKernelDuration(); auto collateDuration = pcloudProducer.openClCollatingAndMeshingEngine.getCollateKernelDuration(); std::cout << __func__ << ": Successfully compacted and collated frame: assemblyDuration=" << assemblyDuration.count() << "ms, compactKernelDuration=" << compactDuration.count() << "ms, collateKernelDuration=" << collateDuration.count() << "ms" << std::endl; #endif } callOriginalCallback(); } }; void PcloudStimulusProducer::produceFrameReq( sscl::cps::Callback callback) { /** EXPLANATION: * We shouldn't acquire the StimulusProducer::shouldContinueLock here because * this function is called from * StimulusProducer::stimFrameProductionTimesliceInd(), which is already * holding the lock. */ auto caller = smoHooksPtr->ComponentThread_getSelf(); auto request = std::make_shared( *this, caller, std::move(callback)); // Post the doAssemble method to the component thread device->componentThread->getIoService().post( STC(std::bind( &ProduceFrameReq::produceFrameReq1_doAssemble_posted, request.get(), request))); } } // namespace stim_buff } // namespace smo