From d87c71b79455b64f7c9565cf6d322af2a309935b Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Wed, 12 Nov 2025 13:03:39 -0400 Subject: [PATCH] OClCollMeshEngn: perf profile and print kernel exec durations --- .../openClCollatingAndMeshingEngine.cpp | 32 +++++++++++++++++++ .../openClCollatingAndMeshingEngine.h | 11 +++++++ .../livoxGen1/pcloudStimulusBuffer.cpp | 6 ++++ 3 files changed, 49 insertions(+) diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index 3ac5a66..fbc37f3 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -766,6 +766,9 @@ public: void compactCollateAndMeshFrameReq1_doCompact_posted( std::shared_ptr context) { + // Record compact kernel start time + engine.compactKernelStartTime = std::chrono::high_resolution_clock::now(); + bool success = engine.startCompactKernel( engine.parent.assemblyBuffer, static_cast(context->frameAssemblyResult.nSucceeded.load()), @@ -788,6 +791,9 @@ public: cl_int compactStatus) { engine.stopCompactKernel(); + // Record compact kernel end time + engine.compactKernelEndTime = std::chrono::high_resolution_clock::now(); + // If compact failed, call callback directly with failure if (compactStatus != CL_SUCCESS) { @@ -811,6 +817,9 @@ public: void compactCollateAndMeshFrameReq3_doCollate_posted( std::shared_ptr context) { + // Record collate kernel start time + engine.collateKernelStartTime = std::chrono::high_resolution_clock::now(); + bool success = engine.startCollateKernel( engine.parent.assemblyBuffer, engine.parent.collationBuffer, std::bind( @@ -832,6 +841,9 @@ public: cl_int collateStatus) { engine.stopCollateKernel(); + // Record collate kernel end time + engine.collateKernelEndTime = std::chrono::high_resolution_clock::now(); + bool success = (collateStatus == CL_SUCCESS); // Early callback + return pattern @@ -901,5 +913,25 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq( } } +std::chrono::milliseconds OpenClCollatingAndMeshingEngine::getCompactKernelDuration() const +{ + auto duration = compactKernelEndTime - compactKernelStartTime; + if (duration.count() < 0) + { + return std::chrono::milliseconds(0); + } + return std::chrono::duration_cast(duration); +} + +std::chrono::milliseconds OpenClCollatingAndMeshingEngine::getCollateKernelDuration() const +{ + auto duration = collateKernelEndTime - collateKernelStartTime; + if (duration.count() < 0) + { + return std::chrono::milliseconds(0); + } + return std::chrono::duration_cast(duration); +} + } // namespace stim_buff } // namespace smo diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h index e53c17a..9709a0b 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.h @@ -8,6 +8,7 @@ #include #include #include +#include #define CL_TARGET_OPENCL_VERSION 120 #include #include @@ -61,6 +62,10 @@ public: AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame, Callback callback); + // Get kernel execution durations in milliseconds + std::chrono::milliseconds getCompactKernelDuration() const; + std::chrono::milliseconds getCollateKernelDuration() const; + private: PcloudStimulusBuffer& parent; @@ -103,6 +108,12 @@ private: compactKernelCbFn compactKernelCb; collateKernelCbFn collateKernelCb; + // Timestamp tracking for kernel execution + std::chrono::high_resolution_clock::time_point compactKernelStartTime; + std::chrono::high_resolution_clock::time_point compactKernelEndTime; + std::chrono::high_resolution_clock::time_point collateKernelStartTime; + std::chrono::high_resolution_clock::time_point collateKernelEndTime; + // Static callbacks for OpenCL events static void CL_CALLBACK compactKernelEventCallback( cl_event event, cl_int event_command_exec_status, void* user_data); diff --git a/stimBuffApis/livoxGen1/pcloudStimulusBuffer.cpp b/stimBuffApis/livoxGen1/pcloudStimulusBuffer.cpp index 011fbe4..b3c39bf 100644 --- a/stimBuffApis/livoxGen1/pcloudStimulusBuffer.cpp +++ b/stimBuffApis/livoxGen1/pcloudStimulusBuffer.cpp @@ -164,6 +164,12 @@ public: std::cout << __func__ << ": Successfully compacted and collated frame" << std::endl; } + // Print kernel execution durations + auto compactDuration = stimBuff.openClCollatingAndMeshingEngine.getCompactKernelDuration(); + auto collateDuration = stimBuff.openClCollatingAndMeshingEngine.getCollateKernelDuration(); + std::cout << __func__ << ": compactKernelDuration=" << compactDuration.count() + << "ms, collateKernelDuration=" << collateDuration.count() << "ms" << std::endl; + callOriginalCallback(); } };