Rename buildmach stuff to buildmach/
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
option(COMPILE_CL_CHECKS "Compile CL checks" OFF)
|
||||
option(COMPILE_PCL_TOOLS "Compile PCL-based validation tools" ON)
|
||||
|
||||
if(COMPILE_CL_CHECKS)
|
||||
# Find OpenCL: try find_package first, fall back to pkg-config
|
||||
find_package(OpenCL QUIET)
|
||||
if(OpenCL_FOUND)
|
||||
# Normalize find_package variables to match pkg_check_modules naming
|
||||
set(OPENCL_FOUND TRUE)
|
||||
set(OPENCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS})
|
||||
# Handle both OpenCL_LIBRARY (singular) and OpenCL_LIBRARIES (plural)
|
||||
if(OpenCL_LIBRARIES)
|
||||
set(OPENCL_LIBRARIES ${OpenCL_LIBRARIES})
|
||||
else()
|
||||
set(OPENCL_LIBRARIES ${OpenCL_LIBRARY})
|
||||
endif()
|
||||
set(OPENCL_LIBRARY_DIRS "")
|
||||
message(STATUS "Found OpenCL using find_package")
|
||||
else()
|
||||
# Fall back to pkg-config
|
||||
pkg_check_modules(OPENCL OpenCL)
|
||||
if(NOT OPENCL_FOUND)
|
||||
message(FATAL_ERROR
|
||||
"Failed to find OpenCL: both find_package and "
|
||||
"pkg_check_modules failed. Try installing the "
|
||||
"'ocl-icd-opencl-dev' package (or the appropriate "
|
||||
"OpenCL development package for your system)."
|
||||
)
|
||||
endif()
|
||||
message(STATUS "Found OpenCL using pkg-config")
|
||||
endif()
|
||||
|
||||
add_executable(clhostshmemptrcheck clhostshmemptrcheck.cpp)
|
||||
target_include_directories(clhostshmemptrcheck
|
||||
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||
target_link_libraries(clhostshmemptrcheck
|
||||
${OPENCL_LIBRARIES})
|
||||
add_executable(clshmemlatency clshmemlatency.cpp)
|
||||
target_include_directories(clshmemlatency
|
||||
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||
target_link_libraries(clshmemlatency
|
||||
${OPENCL_LIBRARIES})
|
||||
add_executable(clshmemlatency_callback clshmemlatency_callback.cpp)
|
||||
target_include_directories(clshmemlatency_callback
|
||||
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||
target_link_libraries(clshmemlatency_callback
|
||||
${OPENCL_LIBRARIES})
|
||||
add_executable(clshmemcheck clshmemcheck.cpp)
|
||||
target_include_directories(clshmemcheck
|
||||
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||
target_link_libraries(clshmemcheck
|
||||
${OPENCL_LIBRARIES})
|
||||
add_executable(clzerocopycheck clzerocopycheck.cpp)
|
||||
target_include_directories(clzerocopycheck
|
||||
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||
target_link_libraries(clzerocopycheck
|
||||
${OPENCL_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(COMPILE_PCL_TOOLS)
|
||||
enable_language(C)
|
||||
find_package(MPI REQUIRED COMPONENTS C)
|
||||
find_package(PCL QUIET COMPONENTS common io surface features search kdtree)
|
||||
if(PCL_FOUND)
|
||||
add_executable(meshFromPcd meshFromPcd.cpp)
|
||||
target_include_directories(meshFromPcd PUBLIC ${PCL_INCLUDE_DIRS})
|
||||
target_link_directories(meshFromPcd PUBLIC ${PCL_LIBRARY_DIRS})
|
||||
target_link_libraries(meshFromPcd ${PCL_LIBRARIES})
|
||||
target_compile_options(meshFromPcd PRIVATE ${PCL_DEFINITIONS})
|
||||
else()
|
||||
message(WARNING "PCL not found; skipping meshFromPcd build")
|
||||
endif()
|
||||
endif()
|
||||
@@ -0,0 +1,125 @@
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#include <CL/cl.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
static const char* clErrorToStr(cl_int err)
|
||||
{
|
||||
switch(err) {
|
||||
case CL_SUCCESS: return "CL_SUCCESS";
|
||||
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
default: return "UNKNOWN_ERROR";
|
||||
}
|
||||
}
|
||||
|
||||
// Try creating a USE_HOST_PTR buffer on this device
|
||||
bool testUseHostPtr(cl_context ctx, cl_device_id dev)
|
||||
{
|
||||
const size_t bufSize = 1024;
|
||||
std::vector<char> host(bufSize, 0);
|
||||
|
||||
cl_int err = 0;
|
||||
cl_mem buf = clCreateBuffer(
|
||||
ctx,
|
||||
CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
|
||||
bufSize,
|
||||
host.data(),
|
||||
&err
|
||||
);
|
||||
|
||||
if(err != CL_SUCCESS) {
|
||||
std::cerr << " clCreateBuffer(CL_MEM_USE_HOST_PTR) failed: "
|
||||
<< clErrorToStr(err) << "\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to enqueue a trivial write to verify it works
|
||||
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
|
||||
if(err != CL_SUCCESS){
|
||||
std::cerr << " Failed to create command queue: "
|
||||
<< clErrorToStr(err) << "\n";
|
||||
clReleaseMemObject(buf);
|
||||
return false;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(q, buf, CL_TRUE, 0, bufSize, host.data(), 0, nullptr, nullptr);
|
||||
clFinish(q);
|
||||
|
||||
bool ok = (err == CL_SUCCESS);
|
||||
|
||||
if(!ok) {
|
||||
std::cerr << " clEnqueueWriteBuffer failed: " << clErrorToStr(err) << "\n";
|
||||
}
|
||||
|
||||
clReleaseCommandQueue(q);
|
||||
clReleaseMemObject(buf);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
cl_uint numPlatforms = 0;
|
||||
clGetPlatformIDs(0, nullptr, &numPlatforms);
|
||||
|
||||
if(numPlatforms == 0){
|
||||
std::cout << "No OpenCL platforms.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::vector<cl_platform_id> plats(numPlatforms);
|
||||
clGetPlatformIDs(numPlatforms, plats.data(), nullptr);
|
||||
|
||||
for(cl_uint p = 0; p < numPlatforms; ++p)
|
||||
{
|
||||
char buf[256];
|
||||
|
||||
clGetPlatformInfo(plats[p], CL_PLATFORM_NAME, sizeof(buf), buf, nullptr);
|
||||
std::cout << "Platform: " << buf << "\n";
|
||||
|
||||
cl_uint numDevs = 0;
|
||||
clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevs);
|
||||
|
||||
if(numDevs == 0) {
|
||||
std::cout << " No devices found on this platform.\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<cl_device_id> devs(numDevs);
|
||||
clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, numDevs, devs.data(), nullptr);
|
||||
|
||||
for(cl_uint d = 0; d < numDevs; ++d)
|
||||
{
|
||||
clGetDeviceInfo(devs[d], CL_DEVICE_NAME, sizeof(buf), buf, nullptr);
|
||||
std::cout << " Device: " << buf << "\n";
|
||||
|
||||
// Create a context for this device
|
||||
cl_int err;
|
||||
cl_context ctx = clCreateContext(nullptr, 1, &devs[d], nullptr, nullptr, &err);
|
||||
|
||||
if(err != CL_SUCCESS) {
|
||||
std::cout << " Failed to create context: "
|
||||
<< clErrorToStr(err) << "\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
bool supported = testUseHostPtr(ctx, devs[d]);
|
||||
|
||||
if(supported)
|
||||
std::cout << " HOST_PTR appears supported.\n";
|
||||
else
|
||||
std::cout << " HOST_PTR appears NOT supported.\n";
|
||||
|
||||
clReleaseContext(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#include <CL/cl.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
|
||||
void checkCLError(cl_int err, const char* msg) {
|
||||
if (err != CL_SUCCESS) {
|
||||
std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
cl_uint numPlatforms = 0;
|
||||
checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms");
|
||||
std::vector<cl_platform_id> platforms(numPlatforms);
|
||||
checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms");
|
||||
|
||||
std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n";
|
||||
|
||||
for (cl_uint p = 0; p < numPlatforms; ++p) {
|
||||
char platformName[256];
|
||||
clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr);
|
||||
std::cout << "Platform " << p << ": " << platformName << "\n";
|
||||
|
||||
cl_uint numDevices = 0;
|
||||
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
|
||||
std::vector<cl_device_id> devices(numDevices);
|
||||
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr);
|
||||
|
||||
for (cl_uint d = 0; d < numDevices; ++d) {
|
||||
char deviceName[256];
|
||||
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
|
||||
std::cout << " Device " << d << ": " << deviceName << "\n";
|
||||
|
||||
cl_bool unifiedMem = CL_FALSE;
|
||||
clGetDeviceInfo(devices[d], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unifiedMem), &unifiedMem, nullptr);
|
||||
std::cout << " Host-Device unified memory: " << (unifiedMem ? "Yes" : "No") << "\n";
|
||||
|
||||
#ifdef CL_DEVICE_SVM_CAPABILITIES
|
||||
cl_device_svm_capabilities svmCaps = 0;
|
||||
clGetDeviceInfo(devices[d], CL_DEVICE_SVM_CAPABILITIES, sizeof(svmCaps), &svmCaps, nullptr);
|
||||
std::cout << " SVM capabilities:\n";
|
||||
if (!svmCaps) std::cout << " None\n";
|
||||
if (svmCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER)
|
||||
std::cout << " - Coarse-grain buffer sharing\n";
|
||||
if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)
|
||||
std::cout << " - Fine-grain buffer sharing\n";
|
||||
if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)
|
||||
std::cout << " - Fine-grain system sharing\n";
|
||||
if (svmCaps & CL_DEVICE_SVM_ATOMICS)
|
||||
std::cout << " - Atomics supported\n";
|
||||
#endif
|
||||
|
||||
// Optional runtime test: check if CL_MEM_USE_HOST_PTR buffer reuses pointer
|
||||
const size_t bufSize = 1024 * 1024;
|
||||
std::vector<char> hostBuffer(bufSize, 42);
|
||||
|
||||
cl_int err;
|
||||
cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
|
||||
checkCLError(err, "create context");
|
||||
|
||||
cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err);
|
||||
checkCLError(err, "create buffer");
|
||||
|
||||
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
|
||||
checkCLError(err, "create queue");
|
||||
|
||||
// Simple host → device → host round-trip test
|
||||
cl_event evt;
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err);
|
||||
checkCLError(err, "map buffer");
|
||||
clWaitForEvents(1, &evt);
|
||||
|
||||
clEnqueueUnmapMemObject(q, buf, mapped, 0, nullptr, nullptr);
|
||||
clReleaseMemObject(buf);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double, std::milli> elapsed = end - start;
|
||||
std::cout << " Map latency: " << elapsed.count() << " ms (lower → likely zero-copy)\n";
|
||||
|
||||
clReleaseCommandQueue(q);
|
||||
clReleaseContext(ctx);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,184 @@
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#include <CL/cl.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
|
||||
void checkCLError(cl_int err, const char* msg) {
|
||||
if (err != CL_SUCCESS) {
|
||||
std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------
|
||||
// Kernel source
|
||||
// Simple mock kernel that simulates splitting XYZ/I
|
||||
// Each "point" is 16 bytes (XYZ + Intensity)
|
||||
const char* kernelSrc = R"CLC(
|
||||
__kernel void xyz_i_split(__global uchar* assembly,
|
||||
__global uchar* xyzOut,
|
||||
__global uchar* iOut,
|
||||
const uint numPoints) {
|
||||
uint gid = get_global_id(0);
|
||||
if (gid >= numPoints) return;
|
||||
|
||||
uint offset = gid * 16;
|
||||
// Copy XYZ (12 bytes) to xyzOut
|
||||
for (int i=0; i<12; ++i)
|
||||
xyzOut[gid*12 + i] = assembly[offset + i];
|
||||
|
||||
// Copy Intensity (4 bytes) to iOut
|
||||
for (int i=0; i<4; ++i)
|
||||
iOut[gid*4 + i] = assembly[offset + 12 + i];
|
||||
}
|
||||
)CLC";
|
||||
|
||||
int main() {
|
||||
// --------------------
|
||||
// CHANGE THIS VALUE to set number of points per assembly buffer
|
||||
const size_t numPointsPerAssembly = 100000; // e.g., ~3333 points per fill
|
||||
const size_t bytesPerPoint = 16; // 12 bytes XYZ + 4 bytes I
|
||||
|
||||
const size_t assemblyBufSize = numPointsPerAssembly * bytesPerPoint;
|
||||
const size_t xyzBufSize = numPointsPerAssembly * 12;
|
||||
const size_t iBufSize = numPointsPerAssembly * 4;
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms");
|
||||
std::vector<cl_platform_id> platforms(numPlatforms);
|
||||
checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms");
|
||||
|
||||
std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n";
|
||||
|
||||
for (cl_uint p = 0; p < numPlatforms; ++p) {
|
||||
char platformName[256];
|
||||
clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr);
|
||||
std::cout << "Platform " << p << ": " << platformName << "\n";
|
||||
|
||||
cl_uint numDevices = 0;
|
||||
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
|
||||
std::vector<cl_device_id> devices(numDevices);
|
||||
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr);
|
||||
|
||||
for (cl_uint d = 0; d < numDevices; ++d) {
|
||||
char deviceName[256];
|
||||
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
|
||||
std::cout << " Device " << d << ": " << deviceName << "\n";
|
||||
|
||||
cl_int err;
|
||||
cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
|
||||
checkCLError(err, "create context");
|
||||
|
||||
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
|
||||
checkCLError(err, "create queue");
|
||||
|
||||
// --------------------
|
||||
// Allocate host buffers
|
||||
std::vector<unsigned char> assemblyHost(assemblyBufSize, 42);
|
||||
std::vector<unsigned char> xyzHost(xyzBufSize, 0);
|
||||
std::vector<unsigned char> iHost(iBufSize, 0);
|
||||
|
||||
std::vector<unsigned char> xyzHostCPU(xyzBufSize, 0);
|
||||
std::vector<unsigned char> iHostCPU(iBufSize, 0);
|
||||
|
||||
// Create CL buffers
|
||||
cl_mem assemblyBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, assemblyBufSize, assemblyHost.data(), &err);
|
||||
checkCLError(err, "create assembly buffer");
|
||||
cl_mem xyzBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, xyzBufSize, xyzHost.data(), &err);
|
||||
checkCLError(err, "create xyz buffer");
|
||||
cl_mem iBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, iBufSize, iHost.data(), &err);
|
||||
checkCLError(err, "create i buffer");
|
||||
|
||||
// Build program
|
||||
cl_program prog = clCreateProgramWithSource(ctx, 1, &kernelSrc, nullptr, &err);
|
||||
checkCLError(err, "create program");
|
||||
|
||||
err = clBuildProgram(prog, 1, &devices[d], nullptr, nullptr, nullptr);
|
||||
if (err != CL_SUCCESS) {
|
||||
// Print build log
|
||||
size_t logSize = 0;
|
||||
clGetProgramBuildInfo(prog, devices[d], CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize);
|
||||
std::vector<char> log(logSize);
|
||||
clGetProgramBuildInfo(prog, devices[d], CL_PROGRAM_BUILD_LOG, logSize, log.data(), nullptr);
|
||||
std::cerr << log.data() << "\n";
|
||||
}
|
||||
checkCLError(err, "build program");
|
||||
|
||||
cl_kernel kernel = clCreateKernel(prog, "xyz_i_split", &err);
|
||||
checkCLError(err, "create kernel");
|
||||
|
||||
// Set kernel args
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &assemblyBuf);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &xyzBuf);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &iBuf);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_uint), &numPointsPerAssembly);
|
||||
|
||||
const size_t globalWorkSize = numPointsPerAssembly;
|
||||
|
||||
// --------------------
|
||||
// Run a few iterations
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
cl_event evt;
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
void* mappedAssembly = clEnqueueMapBuffer(q, assemblyBuf, CL_TRUE, CL_MAP_READ, 0, assemblyBufSize, 0, nullptr, &evt, &err);
|
||||
checkCLError(err, "map assembly buffer");
|
||||
clWaitForEvents(1, &evt);
|
||||
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
err = clEnqueueNDRangeKernel(q, kernel, 1, nullptr, &globalWorkSize, nullptr, 0, nullptr, &evt);
|
||||
checkCLError(err, "enqueue kernel");
|
||||
clWaitForEvents(1, &evt);
|
||||
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
cl_event unmapEvt;
|
||||
err = clEnqueueUnmapMemObject(q, assemblyBuf, mappedAssembly, 0, nullptr, &unmapEvt);
|
||||
checkCLError(err, "unmap assembly buffer");
|
||||
clWaitForEvents(1, &unmapEvt);
|
||||
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// --------------------
|
||||
// Host CPU split
|
||||
auto cpuStart = std::chrono::high_resolution_clock::now();
|
||||
for (size_t pt = 0; pt < numPointsPerAssembly; ++pt) {
|
||||
size_t off = pt * 16;
|
||||
for (int i = 0; i < 12; ++i)
|
||||
xyzHostCPU[pt*12 + i] = assemblyHost[off + i];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
iHostCPU[pt*4 + i] = assemblyHost[off + 12 + i];
|
||||
}
|
||||
auto cpuEnd = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::chrono::duration<double, std::milli> mapElapsed = t1 - t0;
|
||||
std::chrono::duration<double, std::milli> kernelElapsed = t2 - t1;
|
||||
std::chrono::duration<double, std::milli> unmapElapsed = t3 - t2;
|
||||
std::chrono::duration<double, std::milli> cpuElapsed = cpuEnd - cpuStart;
|
||||
|
||||
std::cout << "Iteration " << iter
|
||||
<< " | Map: " << mapElapsed.count()
|
||||
<< " ms | Kernel: " << kernelElapsed.count()
|
||||
<< " ms | Unmap: " << unmapElapsed.count()
|
||||
<< " ms | CPU Split: " << cpuElapsed.count() << " ms\n";
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(prog);
|
||||
clReleaseMemObject(assemblyBuf);
|
||||
clReleaseMemObject(xyzBuf);
|
||||
clReleaseMemObject(iBuf);
|
||||
clReleaseCommandQueue(q);
|
||||
clReleaseContext(ctx);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#include <CL/cl.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
void checkCLError(cl_int err, const char* msg) {
|
||||
if (err != CL_SUCCESS) {
|
||||
std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Callback context for waiting on events
|
||||
struct CallbackContext {
|
||||
std::mutex mtx;
|
||||
std::condition_variable cv;
|
||||
bool completed;
|
||||
cl_int status;
|
||||
std::chrono::high_resolution_clock::time_point* timestamp;
|
||||
};
|
||||
|
||||
// Helper function to wait for callback completion
|
||||
void waitForCallback(CallbackContext& ctx) {
|
||||
std::unique_lock<std::mutex> lock(ctx.mtx);
|
||||
ctx.cv.wait(lock, [&ctx] { return ctx.completed; });
|
||||
std::cout <<"waitForCallback cv.wait() returned.\n";
|
||||
}
|
||||
|
||||
// Static callback for map buffer event
|
||||
void CL_CALLBACK mapEventCallback(cl_event /*event*/, cl_int event_command_exec_status, void* user_data) {
|
||||
CallbackContext* ctx = static_cast<CallbackContext*>(user_data);
|
||||
std::cout <<"mapEventCallback called and about to lock mutex.\n";
|
||||
std::unique_lock<std::mutex> lock(ctx->mtx);
|
||||
ctx->status = event_command_exec_status;
|
||||
if (ctx->timestamp) {
|
||||
*ctx->timestamp = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
ctx->completed = true;
|
||||
ctx->cv.notify_one();
|
||||
std::cout <<"mapEventCallback just notified.\n";
|
||||
}
|
||||
|
||||
// Static callback for kernel execution event
|
||||
void CL_CALLBACK kernelEventCallback(cl_event /*event*/, cl_int event_command_exec_status, void* user_data) {
|
||||
CallbackContext* ctx = static_cast<CallbackContext*>(user_data);
|
||||
std::cout <<"mapEventCallback called and about to lock mutex.\n";
|
||||
std::unique_lock<std::mutex> lock(ctx->mtx);
|
||||
ctx->status = event_command_exec_status;
|
||||
if (ctx->timestamp) {
|
||||
*ctx->timestamp = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
ctx->completed = true;
|
||||
ctx->cv.notify_one();
|
||||
std::cout <<"mapEventCallback just notified.\n";
|
||||
}
|
||||
|
||||
// Static callback for unmap buffer event
|
||||
void CL_CALLBACK unmapEventCallback(cl_event /*event*/, cl_int event_command_exec_status, void* user_data) {
|
||||
CallbackContext* ctx = static_cast<CallbackContext*>(user_data);
|
||||
std::cout <<"mapEventCallback called and about to lock mutex.\n";
|
||||
std::unique_lock<std::mutex> lock(ctx->mtx);
|
||||
ctx->status = event_command_exec_status;
|
||||
if (ctx->timestamp) {
|
||||
*ctx->timestamp = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
ctx->completed = true;
|
||||
ctx->cv.notify_one();
|
||||
std::cout <<"mapEventCallback just notified.\n";
|
||||
}
|
||||
|
||||
// --------------------
|
||||
// Kernel source
|
||||
// Simple mock kernel that simulates splitting XYZ/I
|
||||
// Each "point" is 16 bytes (XYZ + Intensity)
|
||||
const char* kernelSrc = R"CLC(
|
||||
__kernel void xyz_i_split(__global uchar* assembly,
|
||||
__global uchar* xyzOut,
|
||||
__global uchar* iOut,
|
||||
const uint numPoints) {
|
||||
uint gid = get_global_id(0);
|
||||
if (gid >= numPoints) return;
|
||||
|
||||
uint offset = gid * 16;
|
||||
// Copy XYZ (12 bytes) to xyzOut
|
||||
for (int i=0; i<12; ++i)
|
||||
xyzOut[gid*12 + i] = assembly[offset + i];
|
||||
|
||||
// Copy Intensity (4 bytes) to iOut
|
||||
for (int i=0; i<4; ++i)
|
||||
iOut[gid*4 + i] = assembly[offset + 12 + i];
|
||||
}
|
||||
)CLC";
|
||||
|
||||
int main() {
|
||||
// --------------------
|
||||
// CHANGE THIS VALUE to set number of points per assembly buffer
|
||||
const size_t numPointsPerAssembly = 100000; // e.g., ~3333 points per fill
|
||||
const size_t bytesPerPoint = 16; // 12 bytes XYZ + 4 bytes I
|
||||
|
||||
const size_t assemblyBufSize = numPointsPerAssembly * bytesPerPoint;
|
||||
const size_t xyzBufSize = numPointsPerAssembly * 12;
|
||||
const size_t iBufSize = numPointsPerAssembly * 4;
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms");
|
||||
std::vector<cl_platform_id> platforms(numPlatforms);
|
||||
checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms");
|
||||
|
||||
std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n";
|
||||
|
||||
for (cl_uint p = 0; p < numPlatforms; ++p) {
|
||||
char platformName[256];
|
||||
clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr);
|
||||
std::cout << "Platform " << p << ": " << platformName << "\n";
|
||||
|
||||
cl_uint numDevices = 0;
|
||||
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
|
||||
std::vector<cl_device_id> devices(numDevices);
|
||||
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr);
|
||||
|
||||
for (cl_uint d = 0; d < numDevices; ++d) {
|
||||
char deviceName[256];
|
||||
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
|
||||
std::cout << " Device " << d << ": " << deviceName << "\n";
|
||||
|
||||
cl_int err;
|
||||
cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
|
||||
checkCLError(err, "create context");
|
||||
|
||||
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
|
||||
checkCLError(err, "create queue");
|
||||
|
||||
// --------------------
|
||||
// Allocate host buffers
|
||||
std::vector<unsigned char> assemblyHost(assemblyBufSize, 42);
|
||||
std::vector<unsigned char> xyzHost(xyzBufSize, 0);
|
||||
std::vector<unsigned char> iHost(iBufSize, 0);
|
||||
|
||||
std::vector<unsigned char> xyzHostCPU(xyzBufSize, 0);
|
||||
std::vector<unsigned char> iHostCPU(iBufSize, 0);
|
||||
|
||||
// Create CL buffers
|
||||
cl_mem assemblyBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, assemblyBufSize, assemblyHost.data(), &err);
|
||||
checkCLError(err, "create assembly buffer");
|
||||
cl_mem xyzBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, xyzBufSize, xyzHost.data(), &err);
|
||||
checkCLError(err, "create xyz buffer");
|
||||
cl_mem iBuf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, iBufSize, iHost.data(), &err);
|
||||
checkCLError(err, "create i buffer");
|
||||
|
||||
// Build program
|
||||
cl_program prog = clCreateProgramWithSource(ctx, 1, &kernelSrc, nullptr, &err);
|
||||
checkCLError(err, "create program");
|
||||
|
||||
err = clBuildProgram(prog, 1, &devices[d], nullptr, nullptr, nullptr);
|
||||
if (err != CL_SUCCESS) {
|
||||
// Print build log
|
||||
size_t logSize = 0;
|
||||
clGetProgramBuildInfo(prog, devices[d], CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize);
|
||||
std::vector<char> log(logSize);
|
||||
clGetProgramBuildInfo(prog, devices[d], CL_PROGRAM_BUILD_LOG, logSize, log.data(), nullptr);
|
||||
std::cerr << log.data() << "\n";
|
||||
}
|
||||
checkCLError(err, "build program");
|
||||
|
||||
cl_kernel kernel = clCreateKernel(prog, "xyz_i_split", &err);
|
||||
checkCLError(err, "create kernel");
|
||||
|
||||
// Set kernel args
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &assemblyBuf);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &xyzBuf);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &iBuf);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_uint), &numPointsPerAssembly);
|
||||
|
||||
const size_t globalWorkSize = numPointsPerAssembly;
|
||||
|
||||
// --------------------
|
||||
// Run a few iterations
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::high_resolution_clock::time_point t1, t2, t3;
|
||||
|
||||
cl_event mapEvt;
|
||||
void* mappedAssembly = clEnqueueMapBuffer(q, assemblyBuf, CL_FALSE, CL_MAP_READ, 0, assemblyBufSize, 0, nullptr, &mapEvt, &err);
|
||||
checkCLError(err, "map assembly buffer");
|
||||
|
||||
// Retain event to keep it alive until callback completes
|
||||
err = clRetainEvent(mapEvt);
|
||||
checkCLError(err, "retain map event");
|
||||
|
||||
// Wait for map event using callback
|
||||
CallbackContext mapCtx;
|
||||
mapCtx.completed = false;
|
||||
mapCtx.timestamp = &t1;
|
||||
err = clSetEventCallback(mapEvt, CL_COMPLETE, mapEventCallback, &mapCtx);
|
||||
checkCLError(err, "set map event callback");
|
||||
// Force queue flush to ensure event processing
|
||||
err = clFlush(q);
|
||||
checkCLError(err, "flush queue");
|
||||
std::cout <<"About to waitForCalllback for clEnqueueMapBuffer.\n";
|
||||
waitForCallback(mapCtx);
|
||||
checkCLError(mapCtx.status, "map buffer");
|
||||
|
||||
// Release event after callback completes
|
||||
err = clReleaseEvent(mapEvt);
|
||||
checkCLError(err, "release map event");
|
||||
|
||||
cl_event kernelEvt;
|
||||
err = clEnqueueNDRangeKernel(q, kernel, 1, nullptr, &globalWorkSize, nullptr, 0, nullptr, &kernelEvt);
|
||||
checkCLError(err, "enqueue kernel");
|
||||
|
||||
// Retain event to keep it alive until callback completes
|
||||
err = clRetainEvent(kernelEvt);
|
||||
checkCLError(err, "retain kernel event");
|
||||
|
||||
// Wait for kernel event using callback
|
||||
CallbackContext kernelCtx;
|
||||
kernelCtx.completed = false;
|
||||
kernelCtx.timestamp = &t2;
|
||||
err = clSetEventCallback(kernelEvt, CL_COMPLETE, kernelEventCallback, &kernelCtx);
|
||||
checkCLError(err, "set kernel event callback");
|
||||
// Force queue flush to ensure event processing
|
||||
err = clFlush(q);
|
||||
checkCLError(err, "flush queue");
|
||||
std::cout <<"About to waitForCalllback for clEnqueueNDRangeKernel.\n";
|
||||
waitForCallback(kernelCtx);
|
||||
checkCLError(kernelCtx.status, "kernel execution");
|
||||
|
||||
// Release event after callback completes
|
||||
err = clReleaseEvent(kernelEvt);
|
||||
checkCLError(err, "release kernel event");
|
||||
|
||||
cl_event unmapEvt;
|
||||
err = clEnqueueUnmapMemObject(q, assemblyBuf, mappedAssembly, 0, nullptr, &unmapEvt);
|
||||
checkCLError(err, "unmap assembly buffer");
|
||||
|
||||
// Retain event to keep it alive until callback completes
|
||||
err = clRetainEvent(unmapEvt);
|
||||
checkCLError(err, "retain unmap event");
|
||||
|
||||
// Wait for unmap event using callback
|
||||
CallbackContext unmapCtx;
|
||||
unmapCtx.completed = false;
|
||||
unmapCtx.timestamp = &t3;
|
||||
err = clSetEventCallback(unmapEvt, CL_COMPLETE, unmapEventCallback, &unmapCtx);
|
||||
checkCLError(err, "set unmap event callback");
|
||||
// Force queue flush to ensure event processing
|
||||
err = clFlush(q);
|
||||
checkCLError(err, "flush queue");
|
||||
std::cout <<"About to waitForCalllback for clEnqueueUnmapMemObject.\n";
|
||||
waitForCallback(unmapCtx);
|
||||
checkCLError(unmapCtx.status, "unmap buffer");
|
||||
|
||||
// Release event after callback completes
|
||||
err = clReleaseEvent(unmapEvt);
|
||||
checkCLError(err, "release unmap event");
|
||||
|
||||
// --------------------
|
||||
// Host CPU split
|
||||
auto cpuStart = std::chrono::high_resolution_clock::now();
|
||||
for (size_t pt = 0; pt < numPointsPerAssembly; ++pt) {
|
||||
size_t off = pt * 16;
|
||||
for (int i = 0; i < 12; ++i)
|
||||
xyzHostCPU[pt*12 + i] = assemblyHost[off + i];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
iHostCPU[pt*4 + i] = assemblyHost[off + 12 + i];
|
||||
}
|
||||
auto cpuEnd = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::chrono::duration<double, std::milli> mapElapsed = t1 - t0;
|
||||
std::chrono::duration<double, std::milli> kernelElapsed = t2 - t1;
|
||||
std::chrono::duration<double, std::milli> unmapElapsed = t3 - t2;
|
||||
std::chrono::duration<double, std::milli> cpuElapsed = cpuEnd - cpuStart;
|
||||
|
||||
std::cout << "Iteration " << iter
|
||||
<< " | Map: " << mapElapsed.count()
|
||||
<< " ms | Kernel: " << kernelElapsed.count()
|
||||
<< " ms | Unmap: " << unmapElapsed.count()
|
||||
<< " ms | CPU Split: " << cpuElapsed.count() << " ms\n";
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(prog);
|
||||
clReleaseMemObject(assemblyBuf);
|
||||
clReleaseMemObject(xyzBuf);
|
||||
clReleaseMemObject(iBuf);
|
||||
clReleaseCommandQueue(q);
|
||||
clReleaseContext(ctx);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,117 @@
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#include <CL/cl.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
#define CHECK(err, msg) \
|
||||
if (err != CL_SUCCESS) { \
|
||||
std::cerr << "ERROR: " << msg << " (" << err << ")\n"; \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
const char *kernelSrc = R"CLC(
|
||||
__kernel void check_shared(__global const int* in, __global int* out) {
|
||||
int gid = get_global_id(0);
|
||||
out[gid] = in[gid] + 42; // simple deterministic transform
|
||||
}
|
||||
)CLC";
|
||||
|
||||
int main() {
|
||||
cl_int err;
|
||||
|
||||
// Pick first available device
|
||||
cl_uint numPlatforms;
|
||||
CHECK(clGetPlatformIDs(0, nullptr, &numPlatforms), "clGetPlatformIDs count");
|
||||
std::vector<cl_platform_id> plats(numPlatforms);
|
||||
CHECK(clGetPlatformIDs(numPlatforms, plats.data(), nullptr), "clGetPlatformIDs");
|
||||
|
||||
cl_platform_id plat = plats[0];
|
||||
cl_device_id dev;
|
||||
CHECK(clGetDeviceIDs(plat, CL_DEVICE_TYPE_GPU, 1, &dev, nullptr), "clGetDeviceIDs");
|
||||
|
||||
cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err);
|
||||
CHECK(err, "clCreateContext");
|
||||
|
||||
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
|
||||
CHECK(err, "clCreateCommandQueueWithProperties");
|
||||
|
||||
// Create program and kernel
|
||||
const size_t srcLen = std::strlen(kernelSrc);
|
||||
cl_program prog = clCreateProgramWithSource(ctx, 1, &kernelSrc, &srcLen, &err);
|
||||
CHECK(err, "clCreateProgramWithSource");
|
||||
|
||||
err = clBuildProgram(prog, 1, &dev, nullptr, nullptr, nullptr);
|
||||
if (err != CL_SUCCESS) {
|
||||
size_t logSize;
|
||||
clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize);
|
||||
std::vector<char> log(logSize);
|
||||
clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, logSize, log.data(), nullptr);
|
||||
std::cerr << "--- Build Log ---\n" << log.data() << "\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
cl_kernel krn = clCreateKernel(prog, "check_shared", &err);
|
||||
CHECK(err, "clCreateKernel");
|
||||
|
||||
const size_t N = 8;
|
||||
size_t bufSize = N * sizeof(int);
|
||||
|
||||
// Allocate host-visible buffer
|
||||
cl_mem bufIn = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufSize, nullptr, &err);
|
||||
CHECK(err, "clCreateBuffer input");
|
||||
cl_mem bufOut = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, bufSize, nullptr, &err);
|
||||
CHECK(err, "clCreateBuffer output");
|
||||
|
||||
// Map the buffer (should return pointer to real host memory if unified)
|
||||
int* hostPtr = (int*)clEnqueueMapBuffer(q, bufIn, CL_TRUE, CL_MAP_WRITE, 0, bufSize, 0, nullptr, nullptr, &err);
|
||||
CHECK(err, "clEnqueueMapBuffer");
|
||||
|
||||
std::cout << "Mapped host pointer: " << static_cast<void*>(hostPtr) << "\n";
|
||||
|
||||
// Write pattern directly into mapped memory
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
hostPtr[i] = 100 + i;
|
||||
|
||||
// No clEnqueueWriteBuffer call! We rely on shared memory.
|
||||
clEnqueueUnmapMemObject(q, bufIn, hostPtr, 0, nullptr, nullptr);
|
||||
clFinish(q);
|
||||
|
||||
// Set kernel args
|
||||
clSetKernelArg(krn, 0, sizeof(cl_mem), &bufIn);
|
||||
clSetKernelArg(krn, 1, sizeof(cl_mem), &bufOut);
|
||||
|
||||
size_t global = N;
|
||||
err = clEnqueueNDRangeKernel(q, krn, 1, nullptr, &global, nullptr, 0, nullptr, nullptr);
|
||||
CHECK(err, "clEnqueueNDRangeKernel");
|
||||
clFinish(q);
|
||||
|
||||
// Read back result
|
||||
int* outPtr = (int*)clEnqueueMapBuffer(q, bufOut, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, nullptr, &err);
|
||||
CHECK(err, "map output");
|
||||
|
||||
std::cout << "Result: ";
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
std::cout << outPtr[i] << " ";
|
||||
std::cout << "\n";
|
||||
|
||||
// Validate
|
||||
bool ok = true;
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
if (outPtr[i] != static_cast<int>(142 + i)) ok = false;
|
||||
|
||||
std::cout << (ok ? "✅ GPU saw host writes (zero-copy confirmed)\n"
|
||||
: "❌ GPU did not see host writes (copying or staging occurred)\n");
|
||||
|
||||
clEnqueueUnmapMemObject(q, bufOut, outPtr, 0, nullptr, nullptr);
|
||||
clFinish(q);
|
||||
|
||||
clReleaseMemObject(bufIn);
|
||||
clReleaseMemObject(bufOut);
|
||||
clReleaseKernel(krn);
|
||||
clReleaseProgram(prog);
|
||||
clReleaseCommandQueue(q);
|
||||
clReleaseContext(ctx);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,700 @@
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <cmath>
|
||||
#include <filesystem>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <pcl/PolygonMesh.h>
|
||||
#include <pcl/common/io.h>
|
||||
#include <pcl/features/normal_3d.h>
|
||||
#include <pcl/io/pcd_io.h>
|
||||
#include <pcl/io/vtk_io.h>
|
||||
#include <pcl/kdtree/kdtree_flann.h>
|
||||
#include <pcl/point_cloud.h>
|
||||
#include <pcl/point_types.h>
|
||||
#include <pcl/search/kdtree.h>
|
||||
#include <pcl/surface/gp3.h>
|
||||
#include <pcl/surface/organized_fast_mesh.h>
|
||||
|
||||
namespace {
|
||||
|
||||
enum class MeshAlgorithm
|
||||
{
|
||||
Ofm,
|
||||
Gp3,
|
||||
};
|
||||
|
||||
struct ToolOptions
|
||||
{
|
||||
std::vector<std::filesystem::path> inputPaths;
|
||||
std::filesystem::path outputDirectory;
|
||||
bool hasOutputDirectory = false;
|
||||
MeshAlgorithm algorithm = MeshAlgorithm::Ofm;
|
||||
int ofmTrianglePixelSize = 1;
|
||||
float ofmMaxEdgeLength = 0.25f;
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ>::TriangulationType ofmTriangulationType =
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ>::TRIANGLE_ADAPTIVE_CUT;
|
||||
double gp3SearchRadius = 0.05;
|
||||
double gp3Mu = 2.5;
|
||||
int gp3MaxNeighbors = 100;
|
||||
double gp3MaxSurfaceAngleDeg = 45.0;
|
||||
double gp3MinAngleDeg = 10.0;
|
||||
double gp3MaxAngleDeg = 120.0;
|
||||
int gp3NormalK = 20;
|
||||
bool gp3NormalConsistency = false;
|
||||
};
|
||||
|
||||
struct ConversionStats
|
||||
{
|
||||
std::filesystem::path inputPath;
|
||||
std::filesystem::path outputPath;
|
||||
MeshAlgorithm algorithm = MeshAlgorithm::Ofm;
|
||||
std::size_t pointCount = 0;
|
||||
std::size_t finitePointCount = 0;
|
||||
std::size_t polygonCount = 0;
|
||||
double normalDurationMs = 0.0;
|
||||
double meshDurationMs = 0.0;
|
||||
double totalDurationMs = 0.0;
|
||||
bool success = false;
|
||||
std::string errorMessage;
|
||||
};
|
||||
|
||||
constexpr double kPi = 3.14159265358979323846;
|
||||
constexpr std::size_t kMinimumTriangulationPoints = 3;
|
||||
|
||||
double degreesToRadians(double degrees)
|
||||
{
|
||||
return degrees * kPi / 180.0;
|
||||
}
|
||||
|
||||
std::string algorithmToString(MeshAlgorithm algorithm)
|
||||
{
|
||||
switch (algorithm)
|
||||
{
|
||||
case MeshAlgorithm::Ofm:
|
||||
return "ofm";
|
||||
case MeshAlgorithm::Gp3:
|
||||
return "gp3";
|
||||
}
|
||||
|
||||
throw std::runtime_error("Unsupported mesh algorithm enum");
|
||||
}
|
||||
|
||||
void printUsage(const char* argv0)
|
||||
{
|
||||
std::cerr
|
||||
<< "Usage: " << argv0
|
||||
<< " [--algorithm ofm|gp3]"
|
||||
<< " [--output-dir DIR]"
|
||||
<< " [--ofm-triangle-pixel-size N]"
|
||||
<< " [--ofm-max-edge-length F]"
|
||||
<< " [--ofm-triangulation adaptive|left|right|quad]"
|
||||
<< " [--gp3-search-radius F]"
|
||||
<< " [--gp3-mu F]"
|
||||
<< " [--gp3-max-neighbors N]"
|
||||
<< " [--gp3-max-surface-angle-deg F]"
|
||||
<< " [--gp3-min-angle-deg F]"
|
||||
<< " [--gp3-max-angle-deg F]"
|
||||
<< " [--gp3-normal-k N]"
|
||||
<< " [--gp3-normal-consistency true|false]"
|
||||
<< " input1.pcd [input2.pcd ...]\n";
|
||||
}
|
||||
|
||||
bool parseBooleanValue(const std::string& value)
|
||||
{
|
||||
if (value == "true" || value == "1")
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if (value == "false" || value == "0")
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
throw std::runtime_error(
|
||||
"Expected boolean value true|false|1|0, got: " + value);
|
||||
}
|
||||
|
||||
MeshAlgorithm parseAlgorithm(const std::string& value)
|
||||
{
|
||||
if (value == "ofm")
|
||||
{
|
||||
return MeshAlgorithm::Ofm;
|
||||
}
|
||||
if (value == "gp3")
|
||||
{
|
||||
return MeshAlgorithm::Gp3;
|
||||
}
|
||||
|
||||
throw std::runtime_error("Unsupported algorithm: " + value);
|
||||
}
|
||||
|
||||
bool parseTriangulationType(
|
||||
const std::string& value,
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ>::TriangulationType& ofmTriangulationType)
|
||||
{
|
||||
if (value == "adaptive")
|
||||
{
|
||||
ofmTriangulationType =
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ>::TRIANGLE_ADAPTIVE_CUT;
|
||||
return true;
|
||||
}
|
||||
if (value == "left")
|
||||
{
|
||||
ofmTriangulationType =
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ>::TRIANGLE_LEFT_CUT;
|
||||
return true;
|
||||
}
|
||||
if (value == "right")
|
||||
{
|
||||
ofmTriangulationType =
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ>::TRIANGLE_RIGHT_CUT;
|
||||
return true;
|
||||
}
|
||||
if (value == "quad")
|
||||
{
|
||||
ofmTriangulationType =
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ>::QUAD_MESH;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void parseToolOption(
|
||||
ToolOptions& options,
|
||||
const std::string& arg,
|
||||
int& i,
|
||||
int argc,
|
||||
char** argv)
|
||||
{
|
||||
auto requireValue = [&](const char* optionName) -> std::string {
|
||||
if (i + 1 >= argc)
|
||||
{
|
||||
throw std::runtime_error(std::string(optionName) + " requires a value");
|
||||
}
|
||||
return argv[++i];
|
||||
};
|
||||
|
||||
if (arg == "--algorithm")
|
||||
{
|
||||
options.algorithm = parseAlgorithm(requireValue("--algorithm"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--output-dir")
|
||||
{
|
||||
options.outputDirectory = requireValue("--output-dir");
|
||||
options.hasOutputDirectory = true;
|
||||
return;
|
||||
}
|
||||
if (arg == "--ofm-triangle-pixel-size")
|
||||
{
|
||||
options.ofmTrianglePixelSize = std::stoi(
|
||||
requireValue("--ofm-triangle-pixel-size"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--ofm-max-edge-length")
|
||||
{
|
||||
options.ofmMaxEdgeLength = std::stof(
|
||||
requireValue("--ofm-max-edge-length"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--ofm-triangulation")
|
||||
{
|
||||
std::string triangulationValue = requireValue("--ofm-triangulation");
|
||||
if (!parseTriangulationType(
|
||||
triangulationValue, options.ofmTriangulationType))
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"Unsupported triangulation type: " + triangulationValue);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (arg == "--gp3-search-radius")
|
||||
{
|
||||
options.gp3SearchRadius = std::stod(
|
||||
requireValue("--gp3-search-radius"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--gp3-mu")
|
||||
{
|
||||
options.gp3Mu = std::stod(requireValue("--gp3-mu"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--gp3-max-neighbors")
|
||||
{
|
||||
options.gp3MaxNeighbors = std::stoi(
|
||||
requireValue("--gp3-max-neighbors"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--gp3-max-surface-angle-deg")
|
||||
{
|
||||
options.gp3MaxSurfaceAngleDeg = std::stod(
|
||||
requireValue("--gp3-max-surface-angle-deg"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--gp3-min-angle-deg")
|
||||
{
|
||||
options.gp3MinAngleDeg = std::stod(
|
||||
requireValue("--gp3-min-angle-deg"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--gp3-max-angle-deg")
|
||||
{
|
||||
options.gp3MaxAngleDeg = std::stod(
|
||||
requireValue("--gp3-max-angle-deg"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--gp3-normal-k")
|
||||
{
|
||||
options.gp3NormalK = std::stoi(requireValue("--gp3-normal-k"));
|
||||
return;
|
||||
}
|
||||
if (arg == "--gp3-normal-consistency")
|
||||
{
|
||||
options.gp3NormalConsistency = parseBooleanValue(
|
||||
requireValue("--gp3-normal-consistency"));
|
||||
return;
|
||||
}
|
||||
|
||||
throw std::runtime_error("Unknown option: " + arg);
|
||||
}
|
||||
|
||||
ToolOptions parseArgs(int argc, char** argv)
|
||||
{
|
||||
ToolOptions options;
|
||||
|
||||
for (int i = 1; i < argc; ++i)
|
||||
{
|
||||
std::string arg = argv[i];
|
||||
if (!arg.empty() && arg[0] == '-')
|
||||
{
|
||||
parseToolOption(options, arg, i, argc, argv);
|
||||
continue;
|
||||
}
|
||||
|
||||
options.inputPaths.emplace_back(arg);
|
||||
}
|
||||
|
||||
if (options.inputPaths.empty())
|
||||
{
|
||||
throw std::runtime_error("At least one input .pcd file is required");
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
std::filesystem::path computeOutputPath(
|
||||
const ToolOptions& options,
|
||||
const std::filesystem::path& inputPath)
|
||||
{
|
||||
std::filesystem::path outputDirectory = options.hasOutputDirectory
|
||||
? options.outputDirectory
|
||||
: inputPath.parent_path();
|
||||
std::filesystem::path outputFileName = inputPath.filename();
|
||||
outputFileName.replace_extension(".vtk");
|
||||
return outputDirectory / outputFileName;
|
||||
}
|
||||
|
||||
pcl::PointCloud<pcl::PointXYZ>::Ptr loadInputCloud(
|
||||
const std::filesystem::path& inputPath)
|
||||
{
|
||||
pcl::PointCloud<pcl::PointXYZ>::Ptr cloud(
|
||||
new pcl::PointCloud<pcl::PointXYZ>());
|
||||
if (pcl::io::loadPCDFile(inputPath.string(), *cloud) != 0)
|
||||
{
|
||||
throw std::runtime_error("Failed to load input PCD");
|
||||
}
|
||||
|
||||
return cloud;
|
||||
}
|
||||
|
||||
void ensureOutputDirectoryExists(const std::filesystem::path& outputPath)
|
||||
{
|
||||
std::filesystem::create_directories(outputPath.parent_path());
|
||||
}
|
||||
|
||||
void ensureOrganizedCloudForOfm(const pcl::PointCloud<pcl::PointXYZ>& cloud)
|
||||
{
|
||||
if (!cloud.isOrganized())
|
||||
{
|
||||
throw std::runtime_error("Input point cloud is not organized");
|
||||
}
|
||||
}
|
||||
|
||||
pcl::PointCloud<pcl::PointXYZ>::Ptr buildFinitePointCloud(
|
||||
const pcl::PointCloud<pcl::PointXYZ>& cloud)
|
||||
{
|
||||
pcl::PointCloud<pcl::PointXYZ>::Ptr finiteCloud(
|
||||
new pcl::PointCloud<pcl::PointXYZ>());
|
||||
finiteCloud->reserve(cloud.size());
|
||||
|
||||
for (const auto& point : cloud.points)
|
||||
{
|
||||
if (!std::isfinite(point.x) || !std::isfinite(point.y) ||
|
||||
!std::isfinite(point.z))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
finiteCloud->push_back(point);
|
||||
}
|
||||
|
||||
finiteCloud->width = static_cast<std::uint32_t>(finiteCloud->size());
|
||||
finiteCloud->height = 1;
|
||||
finiteCloud->is_dense = false;
|
||||
return finiteCloud;
|
||||
}
|
||||
|
||||
void ensureEnoughFinitePoints(
|
||||
const pcl::PointCloud<pcl::PointXYZ>& finiteCloud,
|
||||
int normalK)
|
||||
{
|
||||
if (finiteCloud.size() < kMinimumTriangulationPoints)
|
||||
{
|
||||
throw std::runtime_error("Too few finite points to triangulate");
|
||||
}
|
||||
|
||||
if (finiteCloud.size() <= static_cast<std::size_t>(normalK))
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"Too few finite points for requested GP3 normal-k");
|
||||
}
|
||||
}
|
||||
|
||||
pcl::PointCloud<pcl::Normal>::Ptr estimateNormals(
|
||||
const pcl::PointCloud<pcl::PointXYZ>::Ptr& finiteCloud,
|
||||
int normalK)
|
||||
{
|
||||
pcl::search::KdTree<pcl::PointXYZ>::Ptr searchTree(
|
||||
new pcl::search::KdTree<pcl::PointXYZ>());
|
||||
searchTree->setInputCloud(finiteCloud);
|
||||
|
||||
pcl::NormalEstimation<pcl::PointXYZ, pcl::Normal> normalEstimation;
|
||||
normalEstimation.setInputCloud(finiteCloud);
|
||||
normalEstimation.setSearchMethod(searchTree);
|
||||
normalEstimation.setKSearch(normalK);
|
||||
normalEstimation.setViewPoint(0.0f, 0.0f, 0.0f);
|
||||
|
||||
pcl::PointCloud<pcl::Normal>::Ptr normals(new pcl::PointCloud<pcl::Normal>());
|
||||
normalEstimation.compute(*normals);
|
||||
return normals;
|
||||
}
|
||||
|
||||
pcl::PointCloud<pcl::PointNormal>::Ptr buildPointNormalsCloud(
|
||||
const pcl::PointCloud<pcl::PointXYZ>& finiteCloud,
|
||||
const pcl::PointCloud<pcl::Normal>& normals)
|
||||
{
|
||||
pcl::PointCloud<pcl::PointNormal>::Ptr pointNormals(
|
||||
new pcl::PointCloud<pcl::PointNormal>());
|
||||
pcl::concatenateFields(finiteCloud, normals, *pointNormals);
|
||||
return pointNormals;
|
||||
}
|
||||
|
||||
void ensureGp3NormalsAreFinite(
|
||||
const pcl::PointCloud<pcl::PointNormal>& pointNormals)
|
||||
{
|
||||
for (const auto& point : pointNormals.points)
|
||||
{
|
||||
if (std::isfinite(point.normal_x) && std::isfinite(point.normal_y) &&
|
||||
std::isfinite(point.normal_z))
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw std::runtime_error("Normal estimation produced no finite normals");
|
||||
}
|
||||
|
||||
void saveMeshOrThrow(
|
||||
const std::filesystem::path& outputPath,
|
||||
const pcl::PolygonMesh& mesh)
|
||||
{
|
||||
if (mesh.polygons.empty())
|
||||
{
|
||||
throw std::runtime_error("Reconstruction produced no polygons");
|
||||
}
|
||||
|
||||
if (pcl::io::saveVTKFile(outputPath.string(), mesh) != 0)
|
||||
{
|
||||
throw std::runtime_error("Failed to save output VTK");
|
||||
}
|
||||
}
|
||||
|
||||
void configureOfm(
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ>& ofm,
|
||||
const ToolOptions& options,
|
||||
const pcl::PointCloud<pcl::PointXYZ>::Ptr& cloud)
|
||||
{
|
||||
ofm.setInputCloud(cloud);
|
||||
ofm.setTriangulationType(options.ofmTriangulationType);
|
||||
ofm.setTrianglePixelSize(options.ofmTrianglePixelSize);
|
||||
ofm.setViewpoint(Eigen::Vector3f::Zero());
|
||||
ofm.setMaxEdgeLength(options.ofmMaxEdgeLength);
|
||||
}
|
||||
|
||||
void configureGp3(
|
||||
pcl::GreedyProjectionTriangulation<pcl::PointNormal>& gp3,
|
||||
const ToolOptions& options,
|
||||
const pcl::PointCloud<pcl::PointNormal>::Ptr& pointNormals,
|
||||
const pcl::search::KdTree<pcl::PointNormal>::Ptr& searchTree)
|
||||
{
|
||||
gp3.setInputCloud(pointNormals);
|
||||
gp3.setSearchMethod(searchTree);
|
||||
gp3.setSearchRadius(options.gp3SearchRadius);
|
||||
gp3.setMu(options.gp3Mu);
|
||||
gp3.setMaximumNearestNeighbors(options.gp3MaxNeighbors);
|
||||
gp3.setMaximumSurfaceAngle(
|
||||
degreesToRadians(options.gp3MaxSurfaceAngleDeg));
|
||||
gp3.setMinimumAngle(degreesToRadians(options.gp3MinAngleDeg));
|
||||
gp3.setMaximumAngle(degreesToRadians(options.gp3MaxAngleDeg));
|
||||
gp3.setNormalConsistency(options.gp3NormalConsistency);
|
||||
}
|
||||
|
||||
ConversionStats convertWithOfm(
|
||||
const ToolOptions& options,
|
||||
const std::filesystem::path& inputPath)
|
||||
{
|
||||
ConversionStats stats;
|
||||
stats.algorithm = MeshAlgorithm::Ofm;
|
||||
stats.inputPath = inputPath;
|
||||
stats.outputPath = computeOutputPath(options, inputPath);
|
||||
|
||||
auto cloud = loadInputCloud(inputPath);
|
||||
ensureOrganizedCloudForOfm(*cloud);
|
||||
ensureOutputDirectoryExists(stats.outputPath);
|
||||
|
||||
pcl::OrganizedFastMesh<pcl::PointXYZ> ofm;
|
||||
configureOfm(ofm, options, cloud);
|
||||
|
||||
pcl::PolygonMesh mesh;
|
||||
auto meshStart = std::chrono::steady_clock::now();
|
||||
ofm.reconstruct(mesh);
|
||||
auto meshEnd = std::chrono::steady_clock::now();
|
||||
|
||||
saveMeshOrThrow(stats.outputPath, mesh);
|
||||
|
||||
stats.pointCount = cloud->size();
|
||||
stats.polygonCount = mesh.polygons.size();
|
||||
stats.meshDurationMs = std::chrono::duration<double, std::milli>(
|
||||
meshEnd - meshStart).count();
|
||||
stats.totalDurationMs = stats.meshDurationMs;
|
||||
stats.success = true;
|
||||
return stats;
|
||||
}
|
||||
|
||||
ConversionStats convertWithGp3(
|
||||
const ToolOptions& options,
|
||||
const std::filesystem::path& inputPath)
|
||||
{
|
||||
ConversionStats stats;
|
||||
stats.algorithm = MeshAlgorithm::Gp3;
|
||||
stats.inputPath = inputPath;
|
||||
stats.outputPath = computeOutputPath(options, inputPath);
|
||||
|
||||
auto cloud = loadInputCloud(inputPath);
|
||||
auto finiteCloud = buildFinitePointCloud(*cloud);
|
||||
ensureEnoughFinitePoints(*finiteCloud, options.gp3NormalK);
|
||||
ensureOutputDirectoryExists(stats.outputPath);
|
||||
|
||||
auto normalStart = std::chrono::steady_clock::now();
|
||||
auto normals = estimateNormals(finiteCloud, options.gp3NormalK);
|
||||
auto normalEnd = std::chrono::steady_clock::now();
|
||||
|
||||
auto pointNormals = buildPointNormalsCloud(*finiteCloud, *normals);
|
||||
ensureGp3NormalsAreFinite(*pointNormals);
|
||||
|
||||
pcl::search::KdTree<pcl::PointNormal>::Ptr searchTree(
|
||||
new pcl::search::KdTree<pcl::PointNormal>());
|
||||
searchTree->setInputCloud(pointNormals);
|
||||
|
||||
pcl::GreedyProjectionTriangulation<pcl::PointNormal> gp3;
|
||||
configureGp3(gp3, options, pointNormals, searchTree);
|
||||
|
||||
pcl::PolygonMesh mesh;
|
||||
auto meshStart = std::chrono::steady_clock::now();
|
||||
gp3.reconstruct(mesh);
|
||||
auto meshEnd = std::chrono::steady_clock::now();
|
||||
|
||||
saveMeshOrThrow(stats.outputPath, mesh);
|
||||
|
||||
stats.pointCount = cloud->size();
|
||||
stats.finitePointCount = finiteCloud->size();
|
||||
stats.polygonCount = mesh.polygons.size();
|
||||
stats.normalDurationMs = std::chrono::duration<double, std::milli>(
|
||||
normalEnd - normalStart).count();
|
||||
stats.meshDurationMs = std::chrono::duration<double, std::milli>(
|
||||
meshEnd - meshStart).count();
|
||||
stats.totalDurationMs = stats.normalDurationMs + stats.meshDurationMs;
|
||||
stats.success = true;
|
||||
return stats;
|
||||
}
|
||||
|
||||
ConversionStats convertSingleFile(
|
||||
const ToolOptions& options,
|
||||
const std::filesystem::path& inputPath)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (options.algorithm == MeshAlgorithm::Ofm)
|
||||
{
|
||||
return convertWithOfm(options, inputPath);
|
||||
}
|
||||
|
||||
return convertWithGp3(options, inputPath);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
ConversionStats stats;
|
||||
stats.algorithm = options.algorithm;
|
||||
stats.inputPath = inputPath;
|
||||
stats.outputPath = computeOutputPath(options, inputPath);
|
||||
stats.errorMessage = e.what();
|
||||
return stats;
|
||||
}
|
||||
}
|
||||
|
||||
void printOfmStats(const ConversionStats& stats)
|
||||
{
|
||||
std::cout << std::fixed << std::setprecision(3)
|
||||
<< "OK"
|
||||
<< " algorithm=ofm"
|
||||
<< " input=" << stats.inputPath
|
||||
<< " output=" << stats.outputPath
|
||||
<< " points=" << stats.pointCount
|
||||
<< " polygons=" << stats.polygonCount
|
||||
<< " mesh_ms=" << stats.meshDurationMs
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
void printGp3Stats(const ConversionStats& stats)
|
||||
{
|
||||
std::cout << std::fixed << std::setprecision(3)
|
||||
<< "OK"
|
||||
<< " algorithm=gp3"
|
||||
<< " input=" << stats.inputPath
|
||||
<< " output=" << stats.outputPath
|
||||
<< " points=" << stats.pointCount
|
||||
<< " finite_points=" << stats.finitePointCount
|
||||
<< " polygons=" << stats.polygonCount
|
||||
<< " normal_ms=" << stats.normalDurationMs
|
||||
<< " mesh_ms=" << stats.meshDurationMs
|
||||
<< " total_ms=" << stats.totalDurationMs
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
void printPerFileStats(const ConversionStats& stats)
|
||||
{
|
||||
if (!stats.success)
|
||||
{
|
||||
std::cout << "FAIL"
|
||||
<< " algorithm=" << algorithmToString(stats.algorithm)
|
||||
<< " input=" << stats.inputPath
|
||||
<< " error=\"" << stats.errorMessage << "\"\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if (stats.algorithm == MeshAlgorithm::Ofm)
|
||||
{
|
||||
printOfmStats(stats);
|
||||
return;
|
||||
}
|
||||
|
||||
printGp3Stats(stats);
|
||||
}
|
||||
|
||||
void printAggregateStats(const ToolOptions& options,
|
||||
const std::vector<ConversionStats>& statsList)
|
||||
{
|
||||
std::size_t successCount = 0;
|
||||
std::size_t failureCount = 0;
|
||||
double totalNormalMs = 0.0;
|
||||
double totalMeshMs = 0.0;
|
||||
double totalConversionMs = 0.0;
|
||||
double minTotalMs = std::numeric_limits<double>::max();
|
||||
double maxTotalMs = 0.0;
|
||||
|
||||
for (const auto& stats : statsList)
|
||||
{
|
||||
if (!stats.success)
|
||||
{
|
||||
++failureCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
++successCount;
|
||||
totalNormalMs += stats.normalDurationMs;
|
||||
totalMeshMs += stats.meshDurationMs;
|
||||
totalConversionMs += stats.totalDurationMs;
|
||||
minTotalMs = std::min(minTotalMs, stats.totalDurationMs);
|
||||
maxTotalMs = std::max(maxTotalMs, stats.totalDurationMs);
|
||||
}
|
||||
|
||||
double averageTotalMs = successCount == 0
|
||||
? 0.0
|
||||
: totalConversionMs / static_cast<double>(successCount);
|
||||
if (successCount == 0)
|
||||
{
|
||||
minTotalMs = 0.0;
|
||||
}
|
||||
|
||||
std::cout << std::fixed << std::setprecision(3)
|
||||
<< "SUMMARY"
|
||||
<< " algorithm=" << algorithmToString(options.algorithm)
|
||||
<< " processed=" << statsList.size()
|
||||
<< " succeeded=" << successCount
|
||||
<< " failed=" << failureCount;
|
||||
|
||||
if (options.algorithm == MeshAlgorithm::Gp3)
|
||||
{
|
||||
std::cout
|
||||
<< " total_normal_ms=" << totalNormalMs
|
||||
<< " total_mesh_ms=" << totalMeshMs
|
||||
<< " total_conversion_ms=" << totalConversionMs
|
||||
<< " avg_total_ms=" << averageTotalMs
|
||||
<< " min_total_ms=" << minTotalMs
|
||||
<< " max_total_ms=" << maxTotalMs
|
||||
<< "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout
|
||||
<< " total_mesh_ms=" << totalMeshMs
|
||||
<< " avg_mesh_ms=" << averageTotalMs
|
||||
<< " min_mesh_ms=" << minTotalMs
|
||||
<< " max_mesh_ms=" << maxTotalMs
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
try
|
||||
{
|
||||
ToolOptions options = parseArgs(argc, argv);
|
||||
std::vector<ConversionStats> statsList;
|
||||
statsList.reserve(options.inputPaths.size());
|
||||
|
||||
for (const auto& inputPath : options.inputPaths)
|
||||
{
|
||||
ConversionStats stats = convertSingleFile(options, inputPath);
|
||||
printPerFileStats(stats);
|
||||
statsList.push_back(std::move(stats));
|
||||
}
|
||||
|
||||
printAggregateStats(options, statsList);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
printUsage(argv[0]);
|
||||
std::cerr << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user