StagingBuff: support both Mlock & IOUring pin; Use in IoUAssmEngn

We use io_uring_register_buffers() for IoUringAssemblyEngine instead
of using mlock(). This __appears__ to have reduced CPU utilization on
the Dell laptop. Could also be that we recently upgraded total RAM
from 8GiB to 32GiB.
This commit is contained in:
2026-04-02 03:51:22 -04:00
parent 26dd686ebf
commit 1d64ce0c7e
11 changed files with 257 additions and 61 deletions
+75 -8
View File
@@ -15,6 +15,14 @@ namespace stim_buff {
// Forward declaration
class FrameAssemblyDesc;
} // namespace stim_buff
} // namespace smo
struct io_uring;
namespace smo {
namespace stim_buff {
/**
* StagingBuffer manages a large buffer to guide io_uring in assembling some
* number of Livox Avia pcloud UDP dgrams into a single stim frame.
@@ -28,6 +36,13 @@ class FrameAssemblyDesc;
class StagingBuffer
{
public:
enum class PinningMechanism
{
NONE,
MLOCK,
IO_URING
};
class IOEngineConstraints
{
public:
@@ -83,7 +98,7 @@ public:
const IOEngineConstraints& inputEngineConstraints,
const IOEngineConstraints& outputEngineConstraints,
size_t nSlots);
~StagingBuffer() = default;
~StagingBuffer();
// Non-copyable, movable
StagingBuffer(const StagingBuffer&) = delete;
@@ -91,6 +106,50 @@ public:
StagingBuffer(StagingBuffer&&) = default;
StagingBuffer& operator=(StagingBuffer&&) = default;
class Pinner
{
public:
Pinner(const Pinner&) = delete;
Pinner& operator=(const Pinner&) = delete;
Pinner(Pinner&&) = delete;
Pinner& operator=(Pinner&&) = delete;
protected:
explicit Pinner(StagingBuffer& parent_);
~Pinner() = default;
StagingBuffer& parent;
};
class MlockPinner
: public Pinner
{
public:
explicit MlockPinner(StagingBuffer& parent);
~MlockPinner();
MlockPinner(const MlockPinner&) = delete;
MlockPinner& operator=(const MlockPinner&) = delete;
MlockPinner(MlockPinner&&) = delete;
MlockPinner& operator=(MlockPinner&&) = delete;
};
class IoUringPinner
: public Pinner
{
public:
IoUringPinner(StagingBuffer& parent, struct io_uring* ring);
~IoUringPinner();
IoUringPinner(const IoUringPinner&) = delete;
IoUringPinner& operator=(const IoUringPinner&) = delete;
IoUringPinner(IoUringPinner&&) = delete;
IoUringPinner& operator=(IoUringPinner&&) = delete;
private:
struct io_uring* ring;
};
public:
/** EXPLANATION:
* Returns an input-engine-agnostic descriptor describing per-frame packet
@@ -104,6 +163,9 @@ public:
void startAssembly() { assemblingFlag.store(true); }
void stopAssembly() { assemblingFlag.store(false); }
std::unique_ptr<MlockPinner> makeMlockPinner();
std::unique_ptr<IoUringPinner> makeIoUringPinner(struct io_uring* ring);
/** EXPLANATION:
* Returns an iovec for io_uring registration.
* The buffer is mmap()-allocated and suitable for IORING_REGISTER_BUFFERS.
@@ -144,6 +206,7 @@ public:
private:
void computeSlotStrideAndBufferSize();
void assertUnpinnedAndMarkPinned(PinningMechanism mechanism);
static size_t calculateFirstSlotOffsetAndValidate(
uint8_t* buffer,
size_t bufferNBytes,
@@ -163,7 +226,6 @@ private:
{
if (ptr != nullptr && size > 0)
{
munlock(ptr, size);
munmap(ptr, size);
}
}
@@ -173,14 +235,14 @@ private:
// Using unique_ptr<uint8_t, MmapDeleter> instead of array syntax
// since we have a custom deleter that knows the size
std::unique_ptr<uint8_t, MmapDeleter> buffer;
size_t bufferNBytes;
size_t bufferNBytes = 0;
// Layout/invariants
size_t nSlots;
size_t nSlots = 0;
public:
size_t slotStrideNBytes;
size_t firstSlotOffsetNBytes; // offset from buffer start to first slot
size_t slotStrideNBytes = 0;
size_t firstSlotOffsetNBytes = 0; // offset from buffer start to first slot
private:
IOEngineConstraints inputConstraints;
@@ -189,8 +251,13 @@ private:
mutable std::shared_ptr<FrameAssemblyDesc> frameDesc;
// Current state
std::atomic<size_t> currentNBytes;
std::atomic<bool> assemblingFlag;
std::atomic<size_t> currentNBytes{0};
std::atomic<bool> assemblingFlag{false};
bool currentlyPinned = false;
PinningMechanism currentPinningMechanism = PinningMechanism::NONE;
friend class MlockPinner;
friend class IoUringPinner;
};
} // namespace stim_buff