PcloudStimBuff,IoUringAssmEngn: add frame assembly perf profiling

We now time the frame assembly sequence.
This commit is contained in:
2025-11-20 03:26:43 -04:00
parent 9e64c510cc
commit 9ce1ced92d
3 changed files with 33 additions and 8 deletions
@@ -432,6 +432,9 @@ public:
// Initialize loop with number of slots
context->loop = AsynchronousLoop(engine.frameAssemblyDesc->numSlots);
// Record assembly start time
engine.assemblyStartTime = std::chrono::high_resolution_clock::now();
/** FIXME:
* I'm suspicious of this std::bind return object here. What if us
* setting it to null inside of stop() doesn't actually cause the
@@ -528,6 +531,10 @@ public:
*/
// Ensure we only execute once using atomic exchange
if (context->handlerExecuted.exchange(true)) { return; }
// Record assembly end time
context->engine.assemblyEndTime =
std::chrono::high_resolution_clock::now();
// Cancel the timer, stop the engine and process frame, if any.
context->engine.assemblyCycleComplete();
@@ -851,5 +858,15 @@ void IoUringAssemblyEngine::printSlotBytes(size_t slotIndex, size_t nBytes)
}
}
std::chrono::milliseconds IoUringAssemblyEngine::getAssemblyDuration() const
{
auto duration = assemblyEndTime - assemblyStartTime;
if (duration.count() < 0)
{
return std::chrono::milliseconds(0);
}
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
}
} // namespace stim_buff
} // namespace smo
@@ -45,6 +45,9 @@ public:
static bool compactionIsNeeded(uint32_t nSucceeded, uint32_t nTotal)
{ return nSucceeded != 0 && nTotal != 0 && nSucceeded != nTotal; }
// Get assembly execution duration in milliseconds
std::chrono::milliseconds getAssemblyDuration() const;
private:
typedef std::function<void(void*, int)> resetAndAssembleFrameCbFn;
void resetAndAssembleFrame(resetAndAssembleFrameCbFn onCqeReady);
@@ -94,6 +97,10 @@ private:
std::random_device randomDevice;
std::mt19937 randomGenerator;
// Timestamp tracking for assembly execution
std::chrono::high_resolution_clock::time_point assemblyStartTime;
std::chrono::high_resolution_clock::time_point assemblyEndTime;
void fillUnAssembledSlotsWithDummyDgrams();
void randomDummySlotFiller(AsynchronousLoop& loop);
void onEventfdRead(
@@ -413,16 +413,17 @@ public:
if (!success) {
std::cerr << __func__ << ": Failed to compact and collate frame" << std::endl;
} else {
std::cout << __func__ << ": Successfully compacted and collated frame" << std::endl;
} else
{
// Print execution durations
auto assemblyDuration = pcloudProducer.ioUringAssemblyEngine.getAssemblyDuration();
auto compactDuration = pcloudProducer.openClCollatingAndMeshingEngine.getCompactKernelDuration();
auto collateDuration = pcloudProducer.openClCollatingAndMeshingEngine.getCollateKernelDuration();
std::cout << __func__ << ": Successfully compacted and collated frame: assemblyDuration=" << assemblyDuration.count()
<< "ms, compactKernelDuration=" << compactDuration.count()
<< "ms, collateKernelDuration=" << collateDuration.count() << "ms" << std::endl;
}
// Print kernel execution durations
auto compactDuration = pcloudProducer.openClCollatingAndMeshingEngine.getCompactKernelDuration();
auto collateDuration = pcloudProducer.openClCollatingAndMeshingEngine.getCollateKernelDuration();
std::cout << __func__ << ": compactKernelDuration=" << compactDuration.count()
<< "ms, collateKernelDuration=" << collateDuration.count() << "ms" << std::endl;
callOriginalCallback();
}
};