From 0b2fde3484e81cc39986955962db896adf6ad4f9 Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Thu, 30 Oct 2025 17:53:58 -0400 Subject: [PATCH] livoxGen1:StaginBuffer: simplify buff size/stride calcs --- stimBuffApis/livoxGen1/stagingBuffer.cpp | 10 +- stimBuffApis/livoxGen1/stagingBuffer.h | 148 +++++++++++++++++++---- 2 files changed, 131 insertions(+), 27 deletions(-) diff --git a/stimBuffApis/livoxGen1/stagingBuffer.cpp b/stimBuffApis/livoxGen1/stagingBuffer.cpp index 394adec..2c19f81 100644 --- a/stimBuffApis/livoxGen1/stagingBuffer.cpp +++ b/stimBuffApis/livoxGen1/stagingBuffer.cpp @@ -3,12 +3,12 @@ namespace smo { namespace stim_buff { +// Static defaults for io_uring const StagingBuffer::InputEngineConstraints -StagingBuffer::InputEngineConstraints::ioUringConstraints{ - 16, // nBytesPerPoint - 28, // udpHeaderOverheadNBytes - 1500 // mtuNBytes -}; +StagingBuffer::InputEngineConstraints::ioUringConstraints( + 4096, // slotStartAlignmentByteVal (page alignment for DMA) + 1472 // slotPadToNBytes (MTU 1500 - UDP/IP header 28) +); } // namespace stim_buff } // namespace smo diff --git a/stimBuffApis/livoxGen1/stagingBuffer.h b/stimBuffApis/livoxGen1/stagingBuffer.h index 0184583..2d45bd3 100644 --- a/stimBuffApis/livoxGen1/stagingBuffer.h +++ b/stimBuffApis/livoxGen1/stagingBuffer.h @@ -5,6 +5,12 @@ #include #include #include +#include +#include +#include +#include + +#include "FrameAssemblyDesc.h" namespace smo { namespace stim_buff { @@ -25,15 +31,38 @@ public: class InputEngineConstraints { public: - InputEngineConstraints(); - ~InputEngineConstraints(); + InputEngineConstraints( + size_t slotStartAlignmentByteVal_, + size_t slotPadToNBytes_) + : slotStartAlignmentByteVal(slotStartAlignmentByteVal_), + slotPadToNBytes(slotPadToNBytes_) + {} + + ~InputEngineConstraints() = default; + + // Input-engine layout/constraints + size_t slotStartAlignmentByteVal; // power-of-2 alignment (e.g., 4096) + size_t slotPadToNBytes; // minimum size per datagram slot + + // Static defaults for io_uring + static const InputEngineConstraints ioUringConstraints; + + inline std::string stringify() const + { + std::ostringstream oss; + oss << "InputEngineConstraints{" + << "slotStartAlignmentByteVal=" << slotStartAlignmentByteVal + << ", slotPadToNBytes=" << slotPadToNBytes + << "}"; + return oss.str(); + } }; class OutputEngineConstraints { public: - OutputEngineConstraints(); - ~OutputEngineConstraints(); + OutputEngineConstraints() = default; + ~OutputEngineConstraints() = default; }; public: @@ -45,8 +74,8 @@ public: explicit StagingBuffer( const InputEngineConstraints& inputEngineConstraints, const OutputEngineConstraints& outputEngineConstraints, - size_t nPointsPerFrame); - ~StagingBuffer(); + size_t nDgramsPerFrame); + ~StagingBuffer() = default; // Non-copyable, movable StagingBuffer(const StagingBuffer&) = delete; @@ -55,32 +84,107 @@ public: StagingBuffer& operator=(StagingBuffer&&) = default; public: - // operator IoUringQueueDescriptor() const; + /** EXPLANATION: + * Returns an input-engine-agnostic descriptor describing per-frame packet + * slot layout. Different input engines should be able to convert this into + * engine-specific metadata. E.g: io_uring's SQE descriptor. + */ + operator std::shared_ptr() const { return frameDesc; } // operator OpenClSharedBufferDescriptor() const; - bool isAssembling() const { return isAssembling_; } - void startAssembly(); - void stopAssembly(); + bool isAssembling() const { return assemblingFlag.load(); } + void startAssembly() { assemblingFlag.store(true); } + void stopAssembly() { assemblingFlag.store(false); } + + inline std::string stringify() const + { + std::ostringstream oss; + oss << "StagingBuffer{" + << "nDgramsPerFrame=" << nDgramsPerFrame + << ", bufferNBytes=" << bufferNBytes + << ", slotStrideNBytes=" << slotStrideNBytes + << ", constraints=" << inputConstraints.stringify() + << "}"; + return oss.str(); + } private: + void computeSlotStrideAndBufferSize(); + // Buffer data - std::unique_ptr buffer_; - size_t bufferSize_; + std::unique_ptr buffer; + size_t bufferNBytes; + + // Layout/invariants + size_t nDgramsPerFrame; + size_t slotStrideNBytes; + InputEngineConstraints inputConstraints; + + // Descriptor (computed once; reused across frames) + mutable std::shared_ptr frameDesc; // Current state - std::atomic currentSize_; - std::atomic isAssembling_; + std::atomic currentNBytes; + std::atomic assemblingFlag; }; -class IoUringConstraints -: public StagingBuffer::InputEngineConstraints +/** Inline implementations + ******************************************************************************/ + +inline StagingBuffer::StagingBuffer( + const InputEngineConstraints& inputEngineConstraints_, + const OutputEngineConstraints& /*outputEngineConstraints*/, + size_t nDgramsPerFrame) +: buffer(nullptr), bufferNBytes(0), +nDgramsPerFrame(nDgramsPerFrame), slotStrideNBytes(0), +inputConstraints(inputEngineConstraints_), +assemblingFlag(false) { -public: - IoUringConstraints() - : StagingBuffer::InputEngineConstraints() - {} - ~IoUringConstraints() = default; -}; + if (nDgramsPerFrame == 0) + { + throw std::invalid_argument(std::string(__func__) + + ": StagingBuffer: nDgramsPerFrame must be > 0"); + } + + computeSlotStrideAndBufferSize(); + + buffer.reset(new uint8_t[bufferNBytes]); + currentNBytes.store(0); + + // Build FrameAssemblyDesc once + std::vector slots; + slots.reserve(nDgramsPerFrame); + uint8_t *frameBase = buffer.get(); + for (size_t i = 0; i < nDgramsPerFrame; ++i) + { + size_t off = i * slotStrideNBytes; + FrameAssemblyDesc::SlotDesc s{ + off, frameBase + off, inputConstraints.slotPadToNBytes}; + + slots.push_back(s); + } + + frameDesc = std::make_shared( + nDgramsPerFrame, inputConstraints.slotPadToNBytes, bufferNBytes, + std::move(slots)); +} + +inline void StagingBuffer::computeSlotStrideAndBufferSize() +{ + // Slot stride is the maximum of alignment and padding + slotStrideNBytes = std::max( + inputConstraints.slotStartAlignmentByteVal, + inputConstraints.slotPadToNBytes); + + // Buffer size is nDgramsPerFrame * slotStrideNBytes, aligned up to alignment + size_t rawSize = nDgramsPerFrame * slotStrideNBytes; + bufferNBytes = ((rawSize + inputConstraints.slotStartAlignmentByteVal - 1) + / inputConstraints.slotStartAlignmentByteVal) + * inputConstraints.slotStartAlignmentByteVal; +} + +/** Specific input/output engine constraints + ******************************************************************************/ class OpenClConstraints : public StagingBuffer::OutputEngineConstraints