OClCollMeshEngn: perf profile and print kernel exec durations
This commit is contained in:
@@ -766,6 +766,9 @@ public:
|
||||
void compactCollateAndMeshFrameReq1_doCompact_posted(
|
||||
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
|
||||
{
|
||||
// Record compact kernel start time
|
||||
engine.compactKernelStartTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
bool success = engine.startCompactKernel(
|
||||
engine.parent.assemblyBuffer,
|
||||
static_cast<uint32_t>(context->frameAssemblyResult.nSucceeded.load()),
|
||||
@@ -788,6 +791,9 @@ public:
|
||||
cl_int compactStatus)
|
||||
{
|
||||
engine.stopCompactKernel();
|
||||
// Record compact kernel end time
|
||||
engine.compactKernelEndTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// If compact failed, call callback directly with failure
|
||||
if (compactStatus != CL_SUCCESS)
|
||||
{
|
||||
@@ -811,6 +817,9 @@ public:
|
||||
void compactCollateAndMeshFrameReq3_doCollate_posted(
|
||||
std::shared_ptr<CompactCollateAndMeshFrameReq> context)
|
||||
{
|
||||
// Record collate kernel start time
|
||||
engine.collateKernelStartTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
bool success = engine.startCollateKernel(
|
||||
engine.parent.assemblyBuffer, engine.parent.collationBuffer,
|
||||
std::bind(
|
||||
@@ -832,6 +841,9 @@ public:
|
||||
cl_int collateStatus)
|
||||
{
|
||||
engine.stopCollateKernel();
|
||||
// Record collate kernel end time
|
||||
engine.collateKernelEndTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
bool success = (collateStatus == CL_SUCCESS);
|
||||
|
||||
// Early callback + return pattern
|
||||
@@ -901,5 +913,25 @@ void OpenClCollatingAndMeshingEngine::compactCollateAndMeshFrameReq(
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::milliseconds OpenClCollatingAndMeshingEngine::getCompactKernelDuration() const
|
||||
{
|
||||
auto duration = compactKernelEndTime - compactKernelStartTime;
|
||||
if (duration.count() < 0)
|
||||
{
|
||||
return std::chrono::milliseconds(0);
|
||||
}
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
|
||||
}
|
||||
|
||||
std::chrono::milliseconds OpenClCollatingAndMeshingEngine::getCollateKernelDuration() const
|
||||
{
|
||||
auto duration = collateKernelEndTime - collateKernelStartTime;
|
||||
if (duration.count() < 0)
|
||||
{
|
||||
return std::chrono::milliseconds(0);
|
||||
}
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
|
||||
}
|
||||
|
||||
} // namespace stim_buff
|
||||
} // namespace smo
|
||||
|
||||
Reference in New Issue
Block a user