OClCollMeshEngn: perf profile and print kernel exec durations
This commit is contained in:
@@ -766,6 +766,9 @@ public:
|
|||||||
void compactCollateAndMeshFrameReq1_doCompact_posted(
|
void compactCollateAndMeshFrameReq1_doCompact_posted(
|
||||||
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
|
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
|
||||||
{
|
{
|
||||||
|
// Record compact kernel start time
|
||||||
|
engine.compactKernelStartTime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
bool success = engine.startCompactKernel(
|
bool success = engine.startCompactKernel(
|
||||||
engine.parent.assemblyBuffer,
|
engine.parent.assemblyBuffer,
|
||||||
static_cast<uint32_t>(context->frameAssemblyResult.nSucceeded.load()),
|
static_cast<uint32_t>(context->frameAssemblyResult.nSucceeded.load()),
|
||||||
@@ -788,6 +791,9 @@ public:
|
|||||||
cl_int compactStatus)
|
cl_int compactStatus)
|
||||||
{
|
{
|
||||||
engine.stopCompactKernel();
|
engine.stopCompactKernel();
|
||||||
|
// Record compact kernel end time
|
||||||
|
engine.compactKernelEndTime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// If compact failed, call callback directly with failure
|
// If compact failed, call callback directly with failure
|
||||||
if (compactStatus != CL_SUCCESS)
|
if (compactStatus != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
@@ -811,6 +817,9 @@ public:
|
|||||||
void compactCollateAndMeshFrameReq3_doCollate_posted(
|
void compactCollateAndMeshFrameReq3_doCollate_posted(
|
||||||
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
|
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
|
||||||
{
|
{
|
||||||
|
// Record collate kernel start time
|
||||||
|
engine.collateKernelStartTime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
bool success = engine.startCollateKernel(
|
bool success = engine.startCollateKernel(
|
||||||
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
|
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
|
||||||
std::bind(
|
std::bind(
|
||||||
@@ -832,6 +841,9 @@ public:
|
|||||||
cl_int collateStatus)
|
cl_int collateStatus)
|
||||||
{
|
{
|
||||||
engine.stopCollateKernel();
|
engine.stopCollateKernel();
|
||||||
|
// Record collate kernel end time
|
||||||
|
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
bool success = (collateStatus == CL_SUCCESS);
|
bool success = (collateStatus == CL_SUCCESS);
|
||||||
|
|
||||||
// Early callback + return pattern
|
// Early callback + return pattern
|
||||||
@@ -901,5 +913,25 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::chrono::milliseconds OpenClCollatingAndMeshingEngine::getCompactKernelDuration() const
|
||||||
|
{
|
||||||
|
auto duration = compactKernelEndTime - compactKernelStartTime;
|
||||||
|
if (duration.count() < 0)
|
||||||
|
{
|
||||||
|
return std::chrono::milliseconds(0);
|
||||||
|
}
|
||||||
|
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::milliseconds OpenClCollatingAndMeshingEngine::getCollateKernelDuration() const
|
||||||
|
{
|
||||||
|
auto duration = collateKernelEndTime - collateKernelStartTime;
|
||||||
|
if (duration.count() < 0)
|
||||||
|
{
|
||||||
|
return std::chrono::milliseconds(0);
|
||||||
|
}
|
||||||
|
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace stim_buff
|
} // namespace stim_buff
|
||||||
} // namespace smo
|
} // namespace smo
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
#include <functional>
|
#include <functional>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#include <chrono>
|
||||||
#define CL_TARGET_OPENCL_VERSION 120
|
#define CL_TARGET_OPENCL_VERSION 120
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#include <asynchronousLoop.h>
|
#include <asynchronousLoop.h>
|
||||||
@@ -61,6 +62,10 @@ public:
|
|||||||
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
AsynchronousLoop& asyncLoop, StimulusFrame& stimulusFrame,
|
||||||
Callback<compactCollateAndMeshFrameReqCbFn> callback);
|
Callback<compactCollateAndMeshFrameReqCbFn> callback);
|
||||||
|
|
||||||
|
// Get kernel execution durations in milliseconds
|
||||||
|
std::chrono::milliseconds getCompactKernelDuration() const;
|
||||||
|
std::chrono::milliseconds getCollateKernelDuration() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
PcloudStimulusBuffer& parent;
|
PcloudStimulusBuffer& parent;
|
||||||
|
|
||||||
@@ -103,6 +108,12 @@ private:
|
|||||||
compactKernelCbFn compactKernelCb;
|
compactKernelCbFn compactKernelCb;
|
||||||
collateKernelCbFn collateKernelCb;
|
collateKernelCbFn collateKernelCb;
|
||||||
|
|
||||||
|
// Timestamp tracking for kernel execution
|
||||||
|
std::chrono::high_resolution_clock::time_point compactKernelStartTime;
|
||||||
|
std::chrono::high_resolution_clock::time_point compactKernelEndTime;
|
||||||
|
std::chrono::high_resolution_clock::time_point collateKernelStartTime;
|
||||||
|
std::chrono::high_resolution_clock::time_point collateKernelEndTime;
|
||||||
|
|
||||||
// Static callbacks for OpenCL events
|
// Static callbacks for OpenCL events
|
||||||
static void CL_CALLBACK compactKernelEventCallback(
|
static void CL_CALLBACK compactKernelEventCallback(
|
||||||
cl_event event, cl_int event_command_exec_status, void* user_data);
|
cl_event event, cl_int event_command_exec_status, void* user_data);
|
||||||
|
|||||||
@@ -164,6 +164,12 @@ public:
|
|||||||
std::cout << __func__ << ": Successfully compacted and collated frame" << std::endl;
|
std::cout << __func__ << ": Successfully compacted and collated frame" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print kernel execution durations
|
||||||
|
auto compactDuration = stimBuff.openClCollatingAndMeshingEngine.getCompactKernelDuration();
|
||||||
|
auto collateDuration = stimBuff.openClCollatingAndMeshingEngine.getCollateKernelDuration();
|
||||||
|
std::cout << __func__ << ": compactKernelDuration=" << compactDuration.count()
|
||||||
|
<< "ms, collateKernelDuration=" << collateDuration.count() << "ms" << std::endl;
|
||||||
|
|
||||||
callOriginalCallback();
|
callOriginalCallback();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user