#include "stagingBuffer.h" #include #include #include #include #include namespace smo { namespace stim_buff { // Static defaults for io_uring const StagingBuffer::IOEngineConstraints StagingBuffer::IOEngineConstraints::ioUringConstraints( // slotStartAlignmentByteVal (page alignment for DMA) static_cast(sysconf(_SC_PAGE_SIZE)), // slotPadToNBytes (MTU 1500 - UDP/IP header 28) 1472, // frameStartAlignmentByteVal (page alignment for DMA) static_cast(sysconf(_SC_PAGE_SIZE)), // framePadToNBytes (MTU 1500 - UDP/IP header 28) static_cast(sysconf(_SC_PAGE_SIZE)) ); // Static defaults for OpenCL input const StagingBuffer::IOEngineConstraints StagingBuffer::IOEngineConstraints::openClInputConstraints( // slotStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // slotPadToNBytes (XYZI point size) 16, // frameStartAlignmentByteVal (page alignment) static_cast(sysconf(_SC_PAGE_SIZE)), // framePadToNBytes (pointer size) static_cast(sysconf(_SC_PAGE_SIZE)) ); // Helper function to calculate maximum alignment needed for first slot // (must satisfy both frame and slot alignment) static size_t calculateMaxAlignment( size_t frameStartAlignmentByteVal, size_t slotStartAlignmentByteVal) { if (frameStartAlignmentByteVal >= slotStartAlignmentByteVal) { if (frameStartAlignmentByteVal % slotStartAlignmentByteVal == 0) { return frameStartAlignmentByteVal; } else { // Need LCM, but for simplicity use the larger alignment // In practice, alignments are usually powers of 2, so this should work return std::max( frameStartAlignmentByteVal, slotStartAlignmentByteVal); } } else { if (slotStartAlignmentByteVal % frameStartAlignmentByteVal == 0) { return slotStartAlignmentByteVal; } else { return std::max( frameStartAlignmentByteVal, slotStartAlignmentByteVal); } } } void StagingBuffer::computeSlotStrideAndBufferSize() { // Slot stride is the maximum of alignment and padding slotStrideNBytes = std::max( inputConstraints.slotStartAlignmentByteVal, inputConstraints.slotPadToNBytes); // Calculate maximum alignment needed for first slot (must satisfy both frame and slot alignment) size_t maxAlignment = calculateMaxAlignment( inputConstraints.frameStartAlignmentByteVal, inputConstraints.slotStartAlignmentByteVal); // Calculate minimum buffer size size_t minBufferSize = std::max( inputConstraints.framePadToNBytes, inputConstraints.slotPadToNBytes); // Calculate total size needed for nDgramsPerFrame slots size_t slotAreaSize = nDgramsPerFrame * slotStrideNBytes; // Add padding space at buffer start for alignment offset (worst case: max alignment - 1) size_t alignmentPadding = maxAlignment - 1; // Total size needed: alignment padding + slot area, then ensure minimum is met size_t rawSize = alignmentPadding + slotAreaSize; if (rawSize < minBufferSize) { rawSize = minBufferSize; } // Align up to the maximum alignment to ensure we can always find a valid offset bufferNBytes = ((rawSize + maxAlignment - 1) / maxAlignment) * maxAlignment; } // Static member function to calculate offset and validate invariants size_t StagingBuffer::calculateFirstSlotOffsetAndValidate( uint8_t* buffer, size_t bufferNBytes, size_t nDgramsPerFrame, size_t slotStrideNBytes, const StagingBuffer::IOEngineConstraints& inputConstraints) { // Calculate maximum alignment needed for first slot size_t maxAlignment = calculateMaxAlignment( inputConstraints.frameStartAlignmentByteVal, inputConstraints.slotStartAlignmentByteVal); // Calculate offset to align first slot to both frame and slot alignment uintptr_t bufferAddr = reinterpret_cast(buffer); uintptr_t alignedAddr = ((bufferAddr + maxAlignment - 1) / maxAlignment) * maxAlignment; size_t firstSlotOffsetNBytes = alignedAddr - bufferAddr; // Validate invariants with exceptions uint8_t* firstSlotAddr = buffer + firstSlotOffsetNBytes; if ( reinterpret_cast(firstSlotAddr) % inputConstraints.frameStartAlignmentByteVal != 0) { throw std::runtime_error(std::string(__func__) + ": StagingBuffer: first slot address not aligned to " + std::to_string(inputConstraints.frameStartAlignmentByteVal)); } if ( reinterpret_cast(firstSlotAddr) % inputConstraints.slotStartAlignmentByteVal != 0) { throw std::runtime_error(std::string(__func__) + ": StagingBuffer: first slot address not aligned to " + std::to_string(inputConstraints.slotStartAlignmentByteVal)); } size_t minBufferSize = std::max( inputConstraints.framePadToNBytes, inputConstraints.slotPadToNBytes); if (bufferNBytes < minBufferSize) { throw std::runtime_error(std::string(__func__) + ": StagingBuffer: buffer size less than minimum required (max of " + std::to_string(inputConstraints.framePadToNBytes) + " and " + std::to_string(inputConstraints.slotPadToNBytes) + ")"); } if (firstSlotOffsetNBytes + nDgramsPerFrame * slotStrideNBytes > bufferNBytes) { throw std::runtime_error(std::string(__func__) + ": StagingBuffer: buffer size insufficient to hold " + std::to_string(nDgramsPerFrame) + " slots with proper alignment and padding"); } return firstSlotOffsetNBytes; } StagingBuffer::StagingBuffer( const IOEngineConstraints& inputEngineConstraints_, const IOEngineConstraints& /*outputEngineConstraints*/, size_t nDgramsPerFrame) : buffer(nullptr, MmapDeleter(0)), bufferNBytes(0), nDgramsPerFrame(nDgramsPerFrame), slotStrideNBytes(0), firstSlotOffsetNBytes(0), inputConstraints(inputEngineConstraints_), assemblingFlag(false) { if (nDgramsPerFrame == 0) { throw std::invalid_argument(std::string(__func__) + ": StagingBuffer: nDgramsPerFrame must be > 0"); } computeSlotStrideAndBufferSize(); /* Allocate buffer using mmap() for io_uring registration * MAP_ANONYMOUS | MAP_PRIVATE creates anonymous, non-file-backed memory */ void* mmapped = mmap( nullptr, bufferNBytes, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (mmapped == MAP_FAILED) { throw std::runtime_error(std::string(__func__) + ": StagingBuffer: mmap() failed"); } buffer = std::unique_ptr( static_cast(mmapped), MmapDeleter(bufferNBytes)); currentNBytes.store(0); // Lock the buffer in memory to prevent swapping if (mlock(buffer.get(), bufferNBytes) != 0) { throw std::runtime_error(std::string(__func__) + ": StagingBuffer: mlock() failed"); } // Calculate offset and validate invariants (helper function in .cpp) firstSlotOffsetNBytes = StagingBuffer::calculateFirstSlotOffsetAndValidate( buffer.get(), bufferNBytes, nDgramsPerFrame, slotStrideNBytes, inputConstraints); // Build FrameAssemblyDesc once std::vector slots; slots.reserve(nDgramsPerFrame); uint8_t *frameBase = buffer.get() + firstSlotOffsetNBytes; for (size_t i = 0; i < nDgramsPerFrame; ++i) { size_t off = i * slotStrideNBytes; FrameAssemblyDesc::SlotDesc s{ off, frameBase + off, inputConstraints.slotPadToNBytes}; slots.push_back(s); } frameDesc = std::make_shared( nDgramsPerFrame, inputConstraints.slotPadToNBytes, bufferNBytes, std::move(slots)); } } // namespace stim_buff } // namespace smo