StagingBuff: Enhance IoConstraints with frame constraints

Now StagingBuff instances must meed both frame and slot
constraints.
This commit is contained in:
2025-11-08 00:15:29 -04:00
parent 5f11a9d6c7
commit 7497f2fd95
2 changed files with 168 additions and 23 deletions
+24 -19
View File
@@ -36,15 +36,19 @@ public:
public:
IOEngineConstraints(
size_t slotStartAlignmentByteVal_,
size_t slotPadToNBytes_)
size_t slotPadToNBytes_,
size_t frameStartAlignmentByteVal_,
size_t framePadToNBytes_)
: slotStartAlignmentByteVal(slotStartAlignmentByteVal_),
slotPadToNBytes(slotPadToNBytes_)
slotPadToNBytes(slotPadToNBytes_),
frameStartAlignmentByteVal(frameStartAlignmentByteVal_),
framePadToNBytes(framePadToNBytes_)
{}
~IOEngineConstraints() = default;
size_t slotStartAlignmentByteVal; // power-of-2 alignment (e.g., 4096)
size_t slotPadToNBytes; // minimum size per datagram slot
size_t slotStartAlignmentByteVal, slotPadToNBytes,
frameStartAlignmentByteVal, framePadToNBytes;
// Static defaults for io_uring and OpenCL
static const IOEngineConstraints ioUringConstraints;
@@ -56,6 +60,8 @@ public:
oss << "IOEngineConstraints{"
<< "slotStartAlignmentByteVal=" << slotStartAlignmentByteVal
<< ", slotPadToNBytes=" << slotPadToNBytes
<< ", frameStartAlignmentByteVal=" << frameStartAlignmentByteVal
<< ", framePadToNBytes=" << framePadToNBytes
<< "}";
return oss.str();
}
@@ -118,6 +124,12 @@ public:
private:
void computeSlotStrideAndBufferSize();
static size_t calculateFirstSlotOffsetAndValidate(
uint8_t* buffer,
size_t bufferNBytes,
size_t nDgramsPerFrame,
size_t slotStrideNBytes,
const IOEngineConstraints& inputConstraints);
// Custom deleter for mmap-allocated buffer
struct MmapDeleter
@@ -143,6 +155,7 @@ private:
// Layout/invariants
size_t nDgramsPerFrame;
size_t slotStrideNBytes;
size_t firstSlotOffsetNBytes; // offset from buffer start to first slot
IOEngineConstraints inputConstraints;
// Descriptor (computed once; reused across frames)
@@ -162,6 +175,7 @@ inline StagingBuffer::StagingBuffer(
size_t nDgramsPerFrame)
: buffer(nullptr, MmapDeleter(0)), bufferNBytes(0),
nDgramsPerFrame(nDgramsPerFrame), slotStrideNBytes(0),
firstSlotOffsetNBytes(0),
inputConstraints(inputEngineConstraints_),
assemblingFlag(false)
{
@@ -192,10 +206,15 @@ assemblingFlag(false)
static_cast<uint8_t*>(mmapped), MmapDeleter(bufferNBytes));
currentNBytes.store(0);
// Calculate offset and validate invariants (helper function in .cpp)
firstSlotOffsetNBytes = StagingBuffer::calculateFirstSlotOffsetAndValidate(
buffer.get(), bufferNBytes, nDgramsPerFrame,
slotStrideNBytes, inputConstraints);
// Build FrameAssemblyDesc once
std::vector<FrameAssemblyDesc::SlotDesc> slots;
slots.reserve(nDgramsPerFrame);
uint8_t *frameBase = buffer.get();
uint8_t *frameBase = buffer.get() + firstSlotOffsetNBytes;
for (size_t i = 0; i < nDgramsPerFrame; ++i)
{
size_t off = i * slotStrideNBytes;
@@ -210,20 +229,6 @@ assemblingFlag(false)
std::move(slots));
}
inline void StagingBuffer::computeSlotStrideAndBufferSize()
{
// Slot stride is the maximum of alignment and padding
slotStrideNBytes = std::max(
inputConstraints.slotStartAlignmentByteVal,
inputConstraints.slotPadToNBytes);
// Buffer size is nDgramsPerFrame * slotStrideNBytes, aligned up to alignment
size_t rawSize = nDgramsPerFrame * slotStrideNBytes;
bufferNBytes = ((rawSize + inputConstraints.slotStartAlignmentByteVal - 1)
/ inputConstraints.slotStartAlignmentByteVal)
* inputConstraints.slotStartAlignmentByteVal;
}
} // namespace stim_buff
} // namespace smo