OClCollMeshEngn: perf profile and print kernel exec durations

This commit is contained in:
2025-11-12 13:03:39 -04:00
parent 33b534355a
commit d87c71b794
3 changed files with 49 additions and 0 deletions
@@ -8,6 +8,7 @@
#include <functional>
#include <iostream>
#include <stdexcept>
#include <chrono>
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
#include <asynchronousLoop.h>
@@ -61,6 +62,10 @@ public:
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
Callback<compactCollateAndMeshFrameReqCbFn> callback);
// Get kernel execution durations in milliseconds
std::chrono::milliseconds getCompactKernelDuration() const;
std::chrono::milliseconds getCollateKernelDuration() const;
private:
PcloudStimulusBuffer& parent;
@@ -103,6 +108,12 @@ private:
compactKernelCbFn compactKernelCb;
collateKernelCbFn collateKernelCb;
// Timestamp tracking for kernel execution
std::chrono::high_resolution_clock::time_point compactKernelStartTime;
std::chrono::high_resolution_clock::time_point compactKernelEndTime;
std::chrono::high_resolution_clock::time_point collateKernelStartTime;
std::chrono::high_resolution_clock::time_point collateKernelEndTime;
// Static callbacks for OpenCL events
static void CL_CALLBACK compactKernelEventCallback(
cl_event event, cl_int event_command_exec_status, void* user_data);