1d64ce0c7e
We use io_uring_register_buffers() for IoUringAssemblyEngine instead of using mlock(). This __appears__ to have reduced CPU utilization on the Dell laptop. Could also be that we recently upgraded total RAM from 8GiB to 32GiB.
267 lines
7.0 KiB
C++
267 lines
7.0 KiB
C++
#ifndef STAGINGBUFFER_H
|
|
#define STAGINGBUFFER_H
|
|
|
|
#include <memory>
|
|
#include <cstdint>
|
|
#include <atomic>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <sys/mman.h>
|
|
#include <sys/uio.h>
|
|
|
|
namespace smo {
|
|
namespace stim_buff {
|
|
|
|
// Forward declaration
|
|
class FrameAssemblyDesc;
|
|
|
|
} // namespace stim_buff
|
|
} // namespace smo
|
|
|
|
struct io_uring;
|
|
|
|
namespace smo {
|
|
namespace stim_buff {
|
|
|
|
/**
|
|
* StagingBuffer manages a large buffer to guide io_uring in assembling some
|
|
* number of Livox Avia pcloud UDP dgrams into a single stim frame.
|
|
*
|
|
* The buffer operates in a cycle:
|
|
* 1. io_uring assembles UDP dgrams into the buffer until it's full
|
|
* 2. Buffer is handed off to the stimbuff layer to be appended to the stimbuff.
|
|
* 3. When the stimbuff layer has appended the current assembled frame, the
|
|
* assembly buffer is reset and cycle repeats.
|
|
*/
|
|
class StagingBuffer
|
|
{
|
|
public:
|
|
enum class PinningMechanism
|
|
{
|
|
NONE,
|
|
MLOCK,
|
|
IO_URING
|
|
};
|
|
|
|
class IOEngineConstraints
|
|
{
|
|
public:
|
|
// Default constructor creates uninitialized constraints
|
|
IOEngineConstraints() = default;
|
|
|
|
IOEngineConstraints(
|
|
size_t slotStartAlignmentByteVal_,
|
|
size_t slotPadToNBytes_,
|
|
size_t frameStartAlignmentByteVal_,
|
|
size_t framePadToNBytes_)
|
|
: slotStartAlignmentByteVal(slotStartAlignmentByteVal_),
|
|
slotPadToNBytes(slotPadToNBytes_),
|
|
frameStartAlignmentByteVal(frameStartAlignmentByteVal_),
|
|
framePadToNBytes(framePadToNBytes_)
|
|
{}
|
|
|
|
~IOEngineConstraints() = default;
|
|
|
|
size_t slotStartAlignmentByteVal, slotPadToNBytes,
|
|
frameStartAlignmentByteVal, framePadToNBytes;
|
|
|
|
// Static defaults for io_uring and OpenCL
|
|
static const IOEngineConstraints ioUringConstraints;
|
|
static const IOEngineConstraints openClInputConstraints;
|
|
|
|
inline std::string stringify() const
|
|
{
|
|
std::ostringstream oss;
|
|
oss << "IOEngineConstraints{"
|
|
<< "slotStartAlignmentByteVal=" << slotStartAlignmentByteVal
|
|
<< ", slotPadToNBytes=" << slotPadToNBytes
|
|
<< ", frameStartAlignmentByteVal=" << frameStartAlignmentByteVal
|
|
<< ", framePadToNBytes=" << framePadToNBytes
|
|
<< "}";
|
|
return oss.str();
|
|
}
|
|
};
|
|
|
|
public:
|
|
/** EXPLANATION:
|
|
* Default constructor creates uninitialized buffer.
|
|
* Must be properly initialized using placement new with the parameterized constructor.
|
|
*/
|
|
StagingBuffer() = default;
|
|
|
|
/** EXPLANATION:
|
|
* We use the input and output engine constraints to determine the total
|
|
* amount of memory required internally to assemble a single frame with
|
|
* the given number of points per frame.
|
|
*/
|
|
explicit StagingBuffer(
|
|
const IOEngineConstraints& inputEngineConstraints,
|
|
const IOEngineConstraints& outputEngineConstraints,
|
|
size_t nSlots);
|
|
~StagingBuffer();
|
|
|
|
// Non-copyable, movable
|
|
StagingBuffer(const StagingBuffer&) = delete;
|
|
StagingBuffer& operator=(const StagingBuffer&) = delete;
|
|
StagingBuffer(StagingBuffer&&) = default;
|
|
StagingBuffer& operator=(StagingBuffer&&) = default;
|
|
|
|
class Pinner
|
|
{
|
|
public:
|
|
Pinner(const Pinner&) = delete;
|
|
Pinner& operator=(const Pinner&) = delete;
|
|
Pinner(Pinner&&) = delete;
|
|
Pinner& operator=(Pinner&&) = delete;
|
|
|
|
protected:
|
|
explicit Pinner(StagingBuffer& parent_);
|
|
~Pinner() = default;
|
|
|
|
StagingBuffer& parent;
|
|
};
|
|
|
|
class MlockPinner
|
|
: public Pinner
|
|
{
|
|
public:
|
|
explicit MlockPinner(StagingBuffer& parent);
|
|
~MlockPinner();
|
|
|
|
MlockPinner(const MlockPinner&) = delete;
|
|
MlockPinner& operator=(const MlockPinner&) = delete;
|
|
MlockPinner(MlockPinner&&) = delete;
|
|
MlockPinner& operator=(MlockPinner&&) = delete;
|
|
};
|
|
|
|
class IoUringPinner
|
|
: public Pinner
|
|
{
|
|
public:
|
|
IoUringPinner(StagingBuffer& parent, struct io_uring* ring);
|
|
~IoUringPinner();
|
|
|
|
IoUringPinner(const IoUringPinner&) = delete;
|
|
IoUringPinner& operator=(const IoUringPinner&) = delete;
|
|
IoUringPinner(IoUringPinner&&) = delete;
|
|
IoUringPinner& operator=(IoUringPinner&&) = delete;
|
|
|
|
private:
|
|
struct io_uring* ring;
|
|
};
|
|
|
|
public:
|
|
/** EXPLANATION:
|
|
* Returns an input-engine-agnostic descriptor describing per-frame packet
|
|
* slot layout. Different input engines should be able to convert this into
|
|
* engine-specific metadata. E.g: io_uring's SQE descriptor.
|
|
*/
|
|
operator std::shared_ptr<FrameAssemblyDesc>() const { return frameDesc; }
|
|
// operator OpenClSharedBufferDescriptor() const;
|
|
|
|
bool isAssembling() const { return assemblingFlag.load(); }
|
|
void startAssembly() { assemblingFlag.store(true); }
|
|
void stopAssembly() { assemblingFlag.store(false); }
|
|
|
|
std::unique_ptr<MlockPinner> makeMlockPinner();
|
|
std::unique_ptr<IoUringPinner> makeIoUringPinner(struct io_uring* ring);
|
|
|
|
/** EXPLANATION:
|
|
* Returns an iovec for io_uring registration.
|
|
* The buffer is mmap()-allocated and suitable for IORING_REGISTER_BUFFERS.
|
|
*/
|
|
struct iovec getIoUringRegisterIoVec() const
|
|
{
|
|
struct iovec iov;
|
|
iov.iov_base = buffer.get();
|
|
iov.iov_len = bufferNBytes;
|
|
return iov;
|
|
}
|
|
|
|
/** EXPLANATION:
|
|
* Returns an iovec for OpenCL engine buffer access.
|
|
* The buffer is mmap()-allocated and suitable for CL_MEM_USE_HOST_PTR.
|
|
* Returns pointer to first slot (offset by firstSlotOffsetNBytes) and
|
|
* size from first slot to end of buffer.
|
|
*/
|
|
struct iovec getClEngineIovec() const
|
|
{
|
|
struct iovec iov;
|
|
iov.iov_base = buffer.get() + firstSlotOffsetNBytes;
|
|
iov.iov_len = bufferNBytes - firstSlotOffsetNBytes;
|
|
return iov;
|
|
}
|
|
|
|
inline std::string stringify() const
|
|
{
|
|
std::ostringstream oss;
|
|
oss << "StagingBuffer{"
|
|
<< "nSlots=" << nSlots
|
|
<< ", bufferNBytes=" << bufferNBytes
|
|
<< ", slotStrideNBytes=" << slotStrideNBytes
|
|
<< ", constraints=" << inputConstraints.stringify()
|
|
<< "}";
|
|
return oss.str();
|
|
}
|
|
|
|
private:
|
|
void computeSlotStrideAndBufferSize();
|
|
void assertUnpinnedAndMarkPinned(PinningMechanism mechanism);
|
|
static size_t calculateFirstSlotOffsetAndValidate(
|
|
uint8_t* buffer,
|
|
size_t bufferNBytes,
|
|
size_t nSlots,
|
|
size_t slotStrideNBytes,
|
|
const IOEngineConstraints& inputConstraints);
|
|
|
|
// Custom deleter for mmap-allocated buffer
|
|
struct MmapDeleter
|
|
{
|
|
size_t size;
|
|
// Default constructor for use with default-constructed StagingBuffer
|
|
MmapDeleter() : size(0) {}
|
|
MmapDeleter(size_t s) : size(s) {}
|
|
|
|
void operator()(uint8_t* ptr) const
|
|
{
|
|
if (ptr != nullptr && size > 0)
|
|
{
|
|
munmap(ptr, size);
|
|
}
|
|
}
|
|
};
|
|
|
|
// Buffer data - mmap-allocated for io_uring registration
|
|
// Using unique_ptr<uint8_t, MmapDeleter> instead of array syntax
|
|
// since we have a custom deleter that knows the size
|
|
std::unique_ptr<uint8_t, MmapDeleter> buffer;
|
|
size_t bufferNBytes = 0;
|
|
|
|
// Layout/invariants
|
|
size_t nSlots = 0;
|
|
|
|
public:
|
|
size_t slotStrideNBytes = 0;
|
|
size_t firstSlotOffsetNBytes = 0; // offset from buffer start to first slot
|
|
|
|
private:
|
|
IOEngineConstraints inputConstraints;
|
|
|
|
// Descriptor (computed once; reused across frames)
|
|
mutable std::shared_ptr<FrameAssemblyDesc> frameDesc;
|
|
|
|
// Current state
|
|
std::atomic<size_t> currentNBytes{0};
|
|
std::atomic<bool> assemblingFlag{false};
|
|
bool currentlyPinned = false;
|
|
PinningMechanism currentPinningMechanism = PinningMechanism::NONE;
|
|
|
|
friend class MlockPinner;
|
|
friend class IoUringPinner;
|
|
};
|
|
|
|
} // namespace stim_buff
|
|
} // namespace smo
|
|
|
|
#endif // STAGINGBUFFER_H
|