From 2e75dd40aa5a12c8f9cd2dac801c1d001959b26f Mon Sep 17 00:00:00 2001 From: Hayodea Hekol Date: Fri, 14 Nov 2025 18:01:48 -0400 Subject: [PATCH] OClCollMeshEngn: Rearrange steps in startCollateKernel Just to make it match startCompactKernel. No other reason. --- .../openClCollatingAndMeshingEngine.cpp | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp index a2ed8fc..3246b21 100644 --- a/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp +++ b/stimBuffApis/livoxGen1/openClCollatingAndMeshingEngine.cpp @@ -424,21 +424,6 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel( // Store the caller's callback collateKernelCb = std::move(callback); - /** EXPLANATION: - * It shouldn't be necessary to map the assembly/collation buffers here - * since we don't need to read/write them on the host CPUs (unless we're - * intervening to debug; in which case we should map them as CL_MAP_READ). - * - * Otherwise, the foreign GPU's view of the data in the assembly buffer - * is currently up to date; and the collation buffer's state is undefined... - * and also irrelevant since it's only going to be used for output anyway. - */ - - mapAssemblyBuffer(CL_MAP_WRITE_INVALIDATE_REGION); - unmapAssemblyBuffer(); - mapCollationBuffer(CL_MAP_WRITE); - unmapCollationBuffer(); - // Validate buffers callable auto validateBuffers = [this, &assemblyBuff, &collationBuff]() { struct iovec assemblyIov = assemblyBuff.getClEngineIovec(); @@ -458,6 +443,31 @@ bool OpenClCollatingAndMeshingEngine::startCollateKernel( return setupCollateDgramsArgs(assemblyBuff); }; + /** EXPLANATION: + * It shouldn't be necessary to map the assembly/collation buffers here + * since we don't need to read/write them on the host CPUs (unless we're + * intervening to debug; in which case we should map them as CL_MAP_READ). + * + * Otherwise, the foreign GPU's view of the data in the assembly buffer + * is currently up to date; and the collation buffer's state is undefined... + * and also irrelevant since it's only going to be used for output anyway. + */ + + if (!mapAssemblyBuffer(CL_MAP_WRITE_INVALIDATE_REGION)) + { + std::cerr << __func__ << ": failed to map assembly buffer" << std::endl; + return false; + } + + unmapAssemblyBuffer(); + if (!mapCollationBuffer(CL_MAP_WRITE)) + { + std::cerr << __func__ << ": failed to map assembly buffer" << std::endl; + return false; + } + + unmapCollationBuffer(); + // Calculate global work size (just num slots in the frame) size_t globalWorkSize = static_cast(frameAssemblyDesc->numSlots);