StagingBuff: Enhance IoConstraints with frame constraints

Now StagingBuff instances must meed both frame and slot
constraints.
This commit is contained in:
2025-11-08 00:15:29 -04:00
parent 5f11a9d6c7
commit 7497f2fd95
2 changed files with 168 additions and 23 deletions
+144 -4
View File
@@ -1,5 +1,7 @@
#include "stagingBuffer.h" #include "stagingBuffer.h"
#include <unistd.h> #include <unistd.h>
#include <cstdint>
#include <stdexcept>
namespace smo { namespace smo {
namespace stim_buff { namespace stim_buff {
@@ -7,16 +9,154 @@ namespace stim_buff {
// Static defaults for io_uring // Static defaults for io_uring
const StagingBuffer::IOEngineConstraints const StagingBuffer::IOEngineConstraints
StagingBuffer::IOEngineConstraints::ioUringConstraints( StagingBuffer::IOEngineConstraints::ioUringConstraints(
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)), // slotStartAlignmentByteVal (page alignment for DMA) // slotStartAlignmentByteVal (page alignment for DMA)
1472 // slotPadToNBytes (MTU 1500 - UDP/IP header 28) static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
// slotPadToNBytes (MTU 1500 - UDP/IP header 28)
1472,
// frameStartAlignmentByteVal (page alignment for DMA)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
// framePadToNBytes (MTU 1500 - UDP/IP header 28)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE))
); );
// Static defaults for OpenCL input // Static defaults for OpenCL input
const StagingBuffer::IOEngineConstraints const StagingBuffer::IOEngineConstraints
StagingBuffer::IOEngineConstraints::openClInputConstraints( StagingBuffer::IOEngineConstraints::openClInputConstraints(
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)), // slotStartAlignmentByteVal (page alignment) // slotStartAlignmentByteVal (page alignment)
sizeof(void *) // slotPadToNBytes (pointer size) static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
// slotPadToNBytes (pointer size)
sizeof(void *),
// frameStartAlignmentByteVal (page alignment)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
// framePadToNBytes (pointer size)
static_cast<size_t>(sysconf(_SC_PAGE_SIZE))
); );
// Helper function to calculate maximum alignment needed for first slot
// (must satisfy both frame and slot alignment)
static size_t calculateMaxAlignment(
size_t frameStartAlignmentByteVal,
size_t slotStartAlignmentByteVal)
{
if (frameStartAlignmentByteVal >= slotStartAlignmentByteVal)
{
if (frameStartAlignmentByteVal % slotStartAlignmentByteVal == 0)
{ return frameStartAlignmentByteVal; }
else
{
// Need LCM, but for simplicity use the larger alignment
// In practice, alignments are usually powers of 2, so this should work
return std::max(
frameStartAlignmentByteVal, slotStartAlignmentByteVal);
}
}
else
{
if (slotStartAlignmentByteVal % frameStartAlignmentByteVal == 0)
{ return slotStartAlignmentByteVal; }
else
{
return std::max(
frameStartAlignmentByteVal, slotStartAlignmentByteVal);
}
}
}
void StagingBuffer::computeSlotStrideAndBufferSize()
{
// Slot stride is the maximum of alignment and padding
slotStrideNBytes = std::max(
inputConstraints.slotStartAlignmentByteVal,
inputConstraints.slotPadToNBytes);
// Calculate maximum alignment needed for first slot (must satisfy both frame and slot alignment)
size_t maxAlignment = calculateMaxAlignment(
inputConstraints.frameStartAlignmentByteVal,
inputConstraints.slotStartAlignmentByteVal);
// Calculate minimum buffer size
size_t minBufferSize = std::max(
inputConstraints.framePadToNBytes,
inputConstraints.slotPadToNBytes);
// Calculate total size needed for nDgramsPerFrame slots
size_t slotAreaSize = nDgramsPerFrame * slotStrideNBytes;
// Add padding space at buffer start for alignment offset (worst case: max alignment - 1)
size_t alignmentPadding = maxAlignment - 1;
// Total size needed: alignment padding + slot area, then ensure minimum is met
size_t rawSize = alignmentPadding + slotAreaSize;
if (rawSize < minBufferSize)
{ rawSize = minBufferSize; }
// Align up to the maximum alignment to ensure we can always find a valid offset
bufferNBytes = ((rawSize + maxAlignment - 1) / maxAlignment) * maxAlignment;
}
// Static member function to calculate offset and validate invariants
size_t StagingBuffer::calculateFirstSlotOffsetAndValidate(
uint8_t* buffer,
size_t bufferNBytes,
size_t nDgramsPerFrame,
size_t slotStrideNBytes,
const StagingBuffer::IOEngineConstraints& inputConstraints)
{
// Calculate maximum alignment needed for first slot
size_t maxAlignment = calculateMaxAlignment(
inputConstraints.frameStartAlignmentByteVal,
inputConstraints.slotStartAlignmentByteVal);
// Calculate offset to align first slot to both frame and slot alignment
uintptr_t bufferAddr = reinterpret_cast<uintptr_t>(buffer);
uintptr_t alignedAddr = ((bufferAddr + maxAlignment - 1) / maxAlignment)
* maxAlignment;
size_t firstSlotOffsetNBytes = alignedAddr - bufferAddr;
// Validate invariants with exceptions
uint8_t* firstSlotAddr = buffer + firstSlotOffsetNBytes;
if (
reinterpret_cast<uintptr_t>(firstSlotAddr)
% inputConstraints.frameStartAlignmentByteVal != 0)
{
throw std::runtime_error(std::string(__func__)
+ ": StagingBuffer: first slot address not aligned to "
+ std::to_string(inputConstraints.frameStartAlignmentByteVal));
}
if (
reinterpret_cast<uintptr_t>(firstSlotAddr)
% inputConstraints.slotStartAlignmentByteVal != 0)
{
throw std::runtime_error(std::string(__func__)
+ ": StagingBuffer: first slot address not aligned to "
+ std::to_string(inputConstraints.slotStartAlignmentByteVal));
}
size_t minBufferSize = std::max(
inputConstraints.framePadToNBytes,
inputConstraints.slotPadToNBytes);
if (bufferNBytes < minBufferSize)
{
throw std::runtime_error(std::string(__func__)
+ ": StagingBuffer: buffer size less than minimum required (max of "
+ std::to_string(inputConstraints.framePadToNBytes)
+ " and "
+ std::to_string(inputConstraints.slotPadToNBytes)
+ ")");
}
if (firstSlotOffsetNBytes + nDgramsPerFrame * slotStrideNBytes
> bufferNBytes)
{
throw std::runtime_error(std::string(__func__)
+ ": StagingBuffer: buffer size insufficient to hold "
+ std::to_string(nDgramsPerFrame)
+ " slots with proper alignment and padding");
}
return firstSlotOffsetNBytes;
}
} // namespace stim_buff } // namespace stim_buff
} // namespace smo } // namespace smo
+24 -19
View File
@@ -36,15 +36,19 @@ public:
public: public:
IOEngineConstraints( IOEngineConstraints(
size_t slotStartAlignmentByteVal_, size_t slotStartAlignmentByteVal_,
size_t slotPadToNBytes_) size_t slotPadToNBytes_,
size_t frameStartAlignmentByteVal_,
size_t framePadToNBytes_)
: slotStartAlignmentByteVal(slotStartAlignmentByteVal_), : slotStartAlignmentByteVal(slotStartAlignmentByteVal_),
slotPadToNBytes(slotPadToNBytes_) slotPadToNBytes(slotPadToNBytes_),
frameStartAlignmentByteVal(frameStartAlignmentByteVal_),
framePadToNBytes(framePadToNBytes_)
{} {}
~IOEngineConstraints() = default; ~IOEngineConstraints() = default;
size_t slotStartAlignmentByteVal; // power-of-2 alignment (e.g., 4096) size_t slotStartAlignmentByteVal, slotPadToNBytes,
size_t slotPadToNBytes; // minimum size per datagram slot frameStartAlignmentByteVal, framePadToNBytes;
// Static defaults for io_uring and OpenCL // Static defaults for io_uring and OpenCL
static const IOEngineConstraints ioUringConstraints; static const IOEngineConstraints ioUringConstraints;
@@ -56,6 +60,8 @@ public:
oss << "IOEngineConstraints{" oss << "IOEngineConstraints{"
<< "slotStartAlignmentByteVal=" << slotStartAlignmentByteVal << "slotStartAlignmentByteVal=" << slotStartAlignmentByteVal
<< ", slotPadToNBytes=" << slotPadToNBytes << ", slotPadToNBytes=" << slotPadToNBytes
<< ", frameStartAlignmentByteVal=" << frameStartAlignmentByteVal
<< ", framePadToNBytes=" << framePadToNBytes
<< "}"; << "}";
return oss.str(); return oss.str();
} }
@@ -118,6 +124,12 @@ public:
private: private:
void computeSlotStrideAndBufferSize(); void computeSlotStrideAndBufferSize();
static size_t calculateFirstSlotOffsetAndValidate(
uint8_t* buffer,
size_t bufferNBytes,
size_t nDgramsPerFrame,
size_t slotStrideNBytes,
const IOEngineConstraints& inputConstraints);
// Custom deleter for mmap-allocated buffer // Custom deleter for mmap-allocated buffer
struct MmapDeleter struct MmapDeleter
@@ -143,6 +155,7 @@ private:
// Layout/invariants // Layout/invariants
size_t nDgramsPerFrame; size_t nDgramsPerFrame;
size_t slotStrideNBytes; size_t slotStrideNBytes;
size_t firstSlotOffsetNBytes; // offset from buffer start to first slot
IOEngineConstraints inputConstraints; IOEngineConstraints inputConstraints;
// Descriptor (computed once; reused across frames) // Descriptor (computed once; reused across frames)
@@ -162,6 +175,7 @@ inline StagingBuffer::StagingBuffer(
size_t nDgramsPerFrame) size_t nDgramsPerFrame)
: buffer(nullptr, MmapDeleter(0)), bufferNBytes(0), : buffer(nullptr, MmapDeleter(0)), bufferNBytes(0),
nDgramsPerFrame(nDgramsPerFrame), slotStrideNBytes(0), nDgramsPerFrame(nDgramsPerFrame), slotStrideNBytes(0),
firstSlotOffsetNBytes(0),
inputConstraints(inputEngineConstraints_), inputConstraints(inputEngineConstraints_),
assemblingFlag(false) assemblingFlag(false)
{ {
@@ -192,10 +206,15 @@ assemblingFlag(false)
static_cast<uint8_t*>(mmapped), MmapDeleter(bufferNBytes)); static_cast<uint8_t*>(mmapped), MmapDeleter(bufferNBytes));
currentNBytes.store(0); currentNBytes.store(0);
// Calculate offset and validate invariants (helper function in .cpp)
firstSlotOffsetNBytes = StagingBuffer::calculateFirstSlotOffsetAndValidate(
buffer.get(), bufferNBytes, nDgramsPerFrame,
slotStrideNBytes, inputConstraints);
// Build FrameAssemblyDesc once // Build FrameAssemblyDesc once
std::vector<FrameAssemblyDesc::SlotDesc> slots; std::vector<FrameAssemblyDesc::SlotDesc> slots;
slots.reserve(nDgramsPerFrame); slots.reserve(nDgramsPerFrame);
uint8_t *frameBase = buffer.get(); uint8_t *frameBase = buffer.get() + firstSlotOffsetNBytes;
for (size_t i = 0; i < nDgramsPerFrame; ++i) for (size_t i = 0; i < nDgramsPerFrame; ++i)
{ {
size_t off = i * slotStrideNBytes; size_t off = i * slotStrideNBytes;
@@ -210,20 +229,6 @@ assemblingFlag(false)
std::move(slots)); std::move(slots));
} }
inline void StagingBuffer::computeSlotStrideAndBufferSize()
{
// Slot stride is the maximum of alignment and padding
slotStrideNBytes = std::max(
inputConstraints.slotStartAlignmentByteVal,
inputConstraints.slotPadToNBytes);
// Buffer size is nDgramsPerFrame * slotStrideNBytes, aligned up to alignment
size_t rawSize = nDgramsPerFrame * slotStrideNBytes;
bufferNBytes = ((rawSize + inputConstraints.slotStartAlignmentByteVal - 1)
/ inputConstraints.slotStartAlignmentByteVal)
* inputConstraints.slotStartAlignmentByteVal;
}
} // namespace stim_buff } // namespace stim_buff
} // namespace smo } // namespace smo