StagingBuff: Enhance IoConstraints with frame constraints
Now StagingBuff instances must meed both frame and slot constraints.
This commit is contained in:
@@ -1,5 +1,7 @@
|
|||||||
#include "stagingBuffer.h"
|
#include "stagingBuffer.h"
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
namespace smo {
|
namespace smo {
|
||||||
namespace stim_buff {
|
namespace stim_buff {
|
||||||
@@ -7,16 +9,154 @@ namespace stim_buff {
|
|||||||
// Static defaults for io_uring
|
// Static defaults for io_uring
|
||||||
const StagingBuffer::IOEngineConstraints
|
const StagingBuffer::IOEngineConstraints
|
||||||
StagingBuffer::IOEngineConstraints::ioUringConstraints(
|
StagingBuffer::IOEngineConstraints::ioUringConstraints(
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)), // slotStartAlignmentByteVal (page alignment for DMA)
|
// slotStartAlignmentByteVal (page alignment for DMA)
|
||||||
1472 // slotPadToNBytes (MTU 1500 - UDP/IP header 28)
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
|
// slotPadToNBytes (MTU 1500 - UDP/IP header 28)
|
||||||
|
1472,
|
||||||
|
// frameStartAlignmentByteVal (page alignment for DMA)
|
||||||
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
|
// framePadToNBytes (MTU 1500 - UDP/IP header 28)
|
||||||
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE))
|
||||||
);
|
);
|
||||||
|
|
||||||
// Static defaults for OpenCL input
|
// Static defaults for OpenCL input
|
||||||
const StagingBuffer::IOEngineConstraints
|
const StagingBuffer::IOEngineConstraints
|
||||||
StagingBuffer::IOEngineConstraints::openClInputConstraints(
|
StagingBuffer::IOEngineConstraints::openClInputConstraints(
|
||||||
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)), // slotStartAlignmentByteVal (page alignment)
|
// slotStartAlignmentByteVal (page alignment)
|
||||||
sizeof(void *) // slotPadToNBytes (pointer size)
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
|
// slotPadToNBytes (pointer size)
|
||||||
|
sizeof(void *),
|
||||||
|
// frameStartAlignmentByteVal (page alignment)
|
||||||
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE)),
|
||||||
|
// framePadToNBytes (pointer size)
|
||||||
|
static_cast<size_t>(sysconf(_SC_PAGE_SIZE))
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Helper function to calculate maximum alignment needed for first slot
|
||||||
|
// (must satisfy both frame and slot alignment)
|
||||||
|
static size_t calculateMaxAlignment(
|
||||||
|
size_t frameStartAlignmentByteVal,
|
||||||
|
size_t slotStartAlignmentByteVal)
|
||||||
|
{
|
||||||
|
if (frameStartAlignmentByteVal >= slotStartAlignmentByteVal)
|
||||||
|
{
|
||||||
|
if (frameStartAlignmentByteVal % slotStartAlignmentByteVal == 0)
|
||||||
|
{ return frameStartAlignmentByteVal; }
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Need LCM, but for simplicity use the larger alignment
|
||||||
|
// In practice, alignments are usually powers of 2, so this should work
|
||||||
|
return std::max(
|
||||||
|
frameStartAlignmentByteVal, slotStartAlignmentByteVal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (slotStartAlignmentByteVal % frameStartAlignmentByteVal == 0)
|
||||||
|
{ return slotStartAlignmentByteVal; }
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return std::max(
|
||||||
|
frameStartAlignmentByteVal, slotStartAlignmentByteVal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void StagingBuffer::computeSlotStrideAndBufferSize()
|
||||||
|
{
|
||||||
|
// Slot stride is the maximum of alignment and padding
|
||||||
|
slotStrideNBytes = std::max(
|
||||||
|
inputConstraints.slotStartAlignmentByteVal,
|
||||||
|
inputConstraints.slotPadToNBytes);
|
||||||
|
|
||||||
|
// Calculate maximum alignment needed for first slot (must satisfy both frame and slot alignment)
|
||||||
|
size_t maxAlignment = calculateMaxAlignment(
|
||||||
|
inputConstraints.frameStartAlignmentByteVal,
|
||||||
|
inputConstraints.slotStartAlignmentByteVal);
|
||||||
|
|
||||||
|
// Calculate minimum buffer size
|
||||||
|
size_t minBufferSize = std::max(
|
||||||
|
inputConstraints.framePadToNBytes,
|
||||||
|
inputConstraints.slotPadToNBytes);
|
||||||
|
|
||||||
|
// Calculate total size needed for nDgramsPerFrame slots
|
||||||
|
size_t slotAreaSize = nDgramsPerFrame * slotStrideNBytes;
|
||||||
|
|
||||||
|
// Add padding space at buffer start for alignment offset (worst case: max alignment - 1)
|
||||||
|
size_t alignmentPadding = maxAlignment - 1;
|
||||||
|
|
||||||
|
// Total size needed: alignment padding + slot area, then ensure minimum is met
|
||||||
|
size_t rawSize = alignmentPadding + slotAreaSize;
|
||||||
|
if (rawSize < minBufferSize)
|
||||||
|
{ rawSize = minBufferSize; }
|
||||||
|
|
||||||
|
// Align up to the maximum alignment to ensure we can always find a valid offset
|
||||||
|
bufferNBytes = ((rawSize + maxAlignment - 1) / maxAlignment) * maxAlignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Static member function to calculate offset and validate invariants
|
||||||
|
size_t StagingBuffer::calculateFirstSlotOffsetAndValidate(
|
||||||
|
uint8_t* buffer,
|
||||||
|
size_t bufferNBytes,
|
||||||
|
size_t nDgramsPerFrame,
|
||||||
|
size_t slotStrideNBytes,
|
||||||
|
const StagingBuffer::IOEngineConstraints& inputConstraints)
|
||||||
|
{
|
||||||
|
// Calculate maximum alignment needed for first slot
|
||||||
|
size_t maxAlignment = calculateMaxAlignment(
|
||||||
|
inputConstraints.frameStartAlignmentByteVal,
|
||||||
|
inputConstraints.slotStartAlignmentByteVal);
|
||||||
|
|
||||||
|
// Calculate offset to align first slot to both frame and slot alignment
|
||||||
|
uintptr_t bufferAddr = reinterpret_cast<uintptr_t>(buffer);
|
||||||
|
uintptr_t alignedAddr = ((bufferAddr + maxAlignment - 1) / maxAlignment)
|
||||||
|
* maxAlignment;
|
||||||
|
size_t firstSlotOffsetNBytes = alignedAddr - bufferAddr;
|
||||||
|
|
||||||
|
// Validate invariants with exceptions
|
||||||
|
uint8_t* firstSlotAddr = buffer + firstSlotOffsetNBytes;
|
||||||
|
if (
|
||||||
|
reinterpret_cast<uintptr_t>(firstSlotAddr)
|
||||||
|
% inputConstraints.frameStartAlignmentByteVal != 0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(std::string(__func__)
|
||||||
|
+ ": StagingBuffer: first slot address not aligned to "
|
||||||
|
+ std::to_string(inputConstraints.frameStartAlignmentByteVal));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
reinterpret_cast<uintptr_t>(firstSlotAddr)
|
||||||
|
% inputConstraints.slotStartAlignmentByteVal != 0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(std::string(__func__)
|
||||||
|
+ ": StagingBuffer: first slot address not aligned to "
|
||||||
|
+ std::to_string(inputConstraints.slotStartAlignmentByteVal));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t minBufferSize = std::max(
|
||||||
|
inputConstraints.framePadToNBytes,
|
||||||
|
inputConstraints.slotPadToNBytes);
|
||||||
|
if (bufferNBytes < minBufferSize)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(std::string(__func__)
|
||||||
|
+ ": StagingBuffer: buffer size less than minimum required (max of "
|
||||||
|
+ std::to_string(inputConstraints.framePadToNBytes)
|
||||||
|
+ " and "
|
||||||
|
+ std::to_string(inputConstraints.slotPadToNBytes)
|
||||||
|
+ ")");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstSlotOffsetNBytes + nDgramsPerFrame * slotStrideNBytes
|
||||||
|
> bufferNBytes)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(std::string(__func__)
|
||||||
|
+ ": StagingBuffer: buffer size insufficient to hold "
|
||||||
|
+ std::to_string(nDgramsPerFrame)
|
||||||
|
+ " slots with proper alignment and padding");
|
||||||
|
}
|
||||||
|
|
||||||
|
return firstSlotOffsetNBytes;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace stim_buff
|
} // namespace stim_buff
|
||||||
} // namespace smo
|
} // namespace smo
|
||||||
|
|||||||
@@ -36,15 +36,19 @@ public:
|
|||||||
public:
|
public:
|
||||||
IOEngineConstraints(
|
IOEngineConstraints(
|
||||||
size_t slotStartAlignmentByteVal_,
|
size_t slotStartAlignmentByteVal_,
|
||||||
size_t slotPadToNBytes_)
|
size_t slotPadToNBytes_,
|
||||||
|
size_t frameStartAlignmentByteVal_,
|
||||||
|
size_t framePadToNBytes_)
|
||||||
: slotStartAlignmentByteVal(slotStartAlignmentByteVal_),
|
: slotStartAlignmentByteVal(slotStartAlignmentByteVal_),
|
||||||
slotPadToNBytes(slotPadToNBytes_)
|
slotPadToNBytes(slotPadToNBytes_),
|
||||||
|
frameStartAlignmentByteVal(frameStartAlignmentByteVal_),
|
||||||
|
framePadToNBytes(framePadToNBytes_)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
~IOEngineConstraints() = default;
|
~IOEngineConstraints() = default;
|
||||||
|
|
||||||
size_t slotStartAlignmentByteVal; // power-of-2 alignment (e.g., 4096)
|
size_t slotStartAlignmentByteVal, slotPadToNBytes,
|
||||||
size_t slotPadToNBytes; // minimum size per datagram slot
|
frameStartAlignmentByteVal, framePadToNBytes;
|
||||||
|
|
||||||
// Static defaults for io_uring and OpenCL
|
// Static defaults for io_uring and OpenCL
|
||||||
static const IOEngineConstraints ioUringConstraints;
|
static const IOEngineConstraints ioUringConstraints;
|
||||||
@@ -56,6 +60,8 @@ public:
|
|||||||
oss << "IOEngineConstraints{"
|
oss << "IOEngineConstraints{"
|
||||||
<< "slotStartAlignmentByteVal=" << slotStartAlignmentByteVal
|
<< "slotStartAlignmentByteVal=" << slotStartAlignmentByteVal
|
||||||
<< ", slotPadToNBytes=" << slotPadToNBytes
|
<< ", slotPadToNBytes=" << slotPadToNBytes
|
||||||
|
<< ", frameStartAlignmentByteVal=" << frameStartAlignmentByteVal
|
||||||
|
<< ", framePadToNBytes=" << framePadToNBytes
|
||||||
<< "}";
|
<< "}";
|
||||||
return oss.str();
|
return oss.str();
|
||||||
}
|
}
|
||||||
@@ -118,6 +124,12 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void computeSlotStrideAndBufferSize();
|
void computeSlotStrideAndBufferSize();
|
||||||
|
static size_t calculateFirstSlotOffsetAndValidate(
|
||||||
|
uint8_t* buffer,
|
||||||
|
size_t bufferNBytes,
|
||||||
|
size_t nDgramsPerFrame,
|
||||||
|
size_t slotStrideNBytes,
|
||||||
|
const IOEngineConstraints& inputConstraints);
|
||||||
|
|
||||||
// Custom deleter for mmap-allocated buffer
|
// Custom deleter for mmap-allocated buffer
|
||||||
struct MmapDeleter
|
struct MmapDeleter
|
||||||
@@ -143,6 +155,7 @@ private:
|
|||||||
// Layout/invariants
|
// Layout/invariants
|
||||||
size_t nDgramsPerFrame;
|
size_t nDgramsPerFrame;
|
||||||
size_t slotStrideNBytes;
|
size_t slotStrideNBytes;
|
||||||
|
size_t firstSlotOffsetNBytes; // offset from buffer start to first slot
|
||||||
IOEngineConstraints inputConstraints;
|
IOEngineConstraints inputConstraints;
|
||||||
|
|
||||||
// Descriptor (computed once; reused across frames)
|
// Descriptor (computed once; reused across frames)
|
||||||
@@ -162,6 +175,7 @@ inline StagingBuffer::StagingBuffer(
|
|||||||
size_t nDgramsPerFrame)
|
size_t nDgramsPerFrame)
|
||||||
: buffer(nullptr, MmapDeleter(0)), bufferNBytes(0),
|
: buffer(nullptr, MmapDeleter(0)), bufferNBytes(0),
|
||||||
nDgramsPerFrame(nDgramsPerFrame), slotStrideNBytes(0),
|
nDgramsPerFrame(nDgramsPerFrame), slotStrideNBytes(0),
|
||||||
|
firstSlotOffsetNBytes(0),
|
||||||
inputConstraints(inputEngineConstraints_),
|
inputConstraints(inputEngineConstraints_),
|
||||||
assemblingFlag(false)
|
assemblingFlag(false)
|
||||||
{
|
{
|
||||||
@@ -192,10 +206,15 @@ assemblingFlag(false)
|
|||||||
static_cast<uint8_t*>(mmapped), MmapDeleter(bufferNBytes));
|
static_cast<uint8_t*>(mmapped), MmapDeleter(bufferNBytes));
|
||||||
currentNBytes.store(0);
|
currentNBytes.store(0);
|
||||||
|
|
||||||
|
// Calculate offset and validate invariants (helper function in .cpp)
|
||||||
|
firstSlotOffsetNBytes = StagingBuffer::calculateFirstSlotOffsetAndValidate(
|
||||||
|
buffer.get(), bufferNBytes, nDgramsPerFrame,
|
||||||
|
slotStrideNBytes, inputConstraints);
|
||||||
|
|
||||||
// Build FrameAssemblyDesc once
|
// Build FrameAssemblyDesc once
|
||||||
std::vector<FrameAssemblyDesc::SlotDesc> slots;
|
std::vector<FrameAssemblyDesc::SlotDesc> slots;
|
||||||
slots.reserve(nDgramsPerFrame);
|
slots.reserve(nDgramsPerFrame);
|
||||||
uint8_t *frameBase = buffer.get();
|
uint8_t *frameBase = buffer.get() + firstSlotOffsetNBytes;
|
||||||
for (size_t i = 0; i < nDgramsPerFrame; ++i)
|
for (size_t i = 0; i < nDgramsPerFrame; ++i)
|
||||||
{
|
{
|
||||||
size_t off = i * slotStrideNBytes;
|
size_t off = i * slotStrideNBytes;
|
||||||
@@ -210,20 +229,6 @@ assemblingFlag(false)
|
|||||||
std::move(slots));
|
std::move(slots));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void StagingBuffer::computeSlotStrideAndBufferSize()
|
|
||||||
{
|
|
||||||
// Slot stride is the maximum of alignment and padding
|
|
||||||
slotStrideNBytes = std::max(
|
|
||||||
inputConstraints.slotStartAlignmentByteVal,
|
|
||||||
inputConstraints.slotPadToNBytes);
|
|
||||||
|
|
||||||
// Buffer size is nDgramsPerFrame * slotStrideNBytes, aligned up to alignment
|
|
||||||
size_t rawSize = nDgramsPerFrame * slotStrideNBytes;
|
|
||||||
bufferNBytes = ((rawSize + inputConstraints.slotStartAlignmentByteVal - 1)
|
|
||||||
/ inputConstraints.slotStartAlignmentByteVal)
|
|
||||||
* inputConstraints.slotStartAlignmentByteVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace stim_buff
|
} // namespace stim_buff
|
||||||
} // namespace smo
|
} // namespace smo
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user