diff --git a/compile/CMakeLists.txt b/compile/CMakeLists.txt index 0d68400..6d35d97 100644 --- a/compile/CMakeLists.txt +++ b/compile/CMakeLists.txt @@ -1,19 +1,52 @@ -if(COMPILE_CL_CHECKS) - find_package(OpenCL REQUIRED) +option(COMPILE_CL_CHECKS "Compile CL checks" OFF) +if(COMPILE_CL_CHECKS) + # Find OpenCL: try find_package first, fall back to pkg-config + find_package(OpenCL QUIET) + if(OpenCL_FOUND) + # Normalize find_package variables to match pkg_check_modules naming + set(OPENCL_FOUND TRUE) + set(OPENCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS}) + # Handle both OpenCL_LIBRARY (singular) and OpenCL_LIBRARIES (plural) + if(OpenCL_LIBRARIES) + set(OPENCL_LIBRARIES ${OpenCL_LIBRARIES}) + else() + set(OPENCL_LIBRARIES ${OpenCL_LIBRARY}) + endif() + set(OPENCL_LIBRARY_DIRS "") + message(STATUS "Found OpenCL using find_package") + else() + # Fall back to pkg-config + pkg_check_modules(OPENCL OpenCL) + if(NOT OPENCL_FOUND) + message(FATAL_ERROR + "Failed to find OpenCL: both find_package and " + "pkg_check_modules failed. Try installing the " + "'ocl-icd-opencl-dev' package (or the appropriate " + "OpenCL development package for your system)." + ) + endif() + message(STATUS "Found OpenCL using pkg-config") + endif() + + add_executable(clhostshmemptrcheck clhostshmemptrcheck.cpp) + target_include_directories(clhostshmemptrcheck + PUBLIC ${OPENCL_INCLUDE_DIRS}) + target_link_libraries(clhostshmemptrcheck + ${OPENCL_LIBRARIES}) add_executable(clshmemlatency clshmemlatency.cpp) target_include_directories(clshmemlatency - PUBLIC ${OpenCL_INCLUDE_DIRS}) + PUBLIC ${OPENCL_INCLUDE_DIRS}) target_link_libraries(clshmemlatency - ${OpenCL_LIBRARY}) + ${OPENCL_LIBRARIES}) add_executable(clshmemcheck clshmemcheck.cpp) target_include_directories(clshmemcheck - PUBLIC ${OpenCL_INCLUDE_DIRS}) + PUBLIC ${OPENCL_INCLUDE_DIRS}) target_link_libraries(clshmemcheck - ${OpenCL_LIBRARY}) + ${OPENCL_LIBRARIES}) add_executable(clzerocopycheck clzerocopycheck.cpp) target_include_directories(clzerocopycheck - PUBLIC ${OpenCL_INCLUDE_DIRS}) + PUBLIC ${OPENCL_INCLUDE_DIRS}) target_link_libraries(clzerocopycheck - ${OpenCL_LIBRARY}) + ${OPENCL_LIBRARIES}) endif() diff --git a/compile/clhostshmemptrcheck.cpp b/compile/clhostshmemptrcheck.cpp new file mode 100644 index 0000000..f41b0b3 --- /dev/null +++ b/compile/clhostshmemptrcheck.cpp @@ -0,0 +1,125 @@ +#define CL_TARGET_OPENCL_VERSION 300 +#include +#include +#include +#include + +static const char* clErrorToStr(cl_int err) +{ + switch(err) { + case CL_SUCCESS: return "CL_SUCCESS"; + case CL_INVALID_VALUE: return "CL_INVALID_VALUE"; + case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT"; + case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT"; + case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY"; + case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + default: return "UNKNOWN_ERROR"; + } +} + +// Try creating a USE_HOST_PTR buffer on this device +bool testUseHostPtr(cl_context ctx, cl_device_id dev) +{ + const size_t bufSize = 1024; + std::vector host(bufSize, 0); + + cl_int err = 0; + cl_mem buf = clCreateBuffer( + ctx, + CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, + bufSize, + host.data(), + &err + ); + + if(err != CL_SUCCESS) { + std::cerr << " clCreateBuffer(CL_MEM_USE_HOST_PTR) failed: " + << clErrorToStr(err) << "\n"; + return false; + } + + // Try to enqueue a trivial write to verify it works + cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0}; + cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err); + if(err != CL_SUCCESS){ + std::cerr << " Failed to create command queue: " + << clErrorToStr(err) << "\n"; + clReleaseMemObject(buf); + return false; + } + + err = clEnqueueWriteBuffer(q, buf, CL_TRUE, 0, bufSize, host.data(), 0, nullptr, nullptr); + clFinish(q); + + bool ok = (err == CL_SUCCESS); + + if(!ok) { + std::cerr << " clEnqueueWriteBuffer failed: " << clErrorToStr(err) << "\n"; + } + + clReleaseCommandQueue(q); + clReleaseMemObject(buf); + + return ok; +} + +int main() +{ + cl_uint numPlatforms = 0; + clGetPlatformIDs(0, nullptr, &numPlatforms); + + if(numPlatforms == 0){ + std::cout << "No OpenCL platforms.\n"; + return 0; + } + + std::vector plats(numPlatforms); + clGetPlatformIDs(numPlatforms, plats.data(), nullptr); + + for(cl_uint p = 0; p < numPlatforms; ++p) + { + char buf[256]; + + clGetPlatformInfo(plats[p], CL_PLATFORM_NAME, sizeof(buf), buf, nullptr); + std::cout << "Platform: " << buf << "\n"; + + cl_uint numDevs = 0; + clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevs); + + if(numDevs == 0) { + std::cout << " No devices found on this platform.\n"; + continue; + } + + std::vector devs(numDevs); + clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, numDevs, devs.data(), nullptr); + + for(cl_uint d = 0; d < numDevs; ++d) + { + clGetDeviceInfo(devs[d], CL_DEVICE_NAME, sizeof(buf), buf, nullptr); + std::cout << " Device: " << buf << "\n"; + + // Create a context for this device + cl_int err; + cl_context ctx = clCreateContext(nullptr, 1, &devs[d], nullptr, nullptr, &err); + + if(err != CL_SUCCESS) { + std::cout << " Failed to create context: " + << clErrorToStr(err) << "\n"; + continue; + } + + bool supported = testUseHostPtr(ctx, devs[d]); + + if(supported) + std::cout << " HOST_PTR appears supported.\n"; + else + std::cout << " HOST_PTR appears NOT supported.\n"; + + clReleaseContext(ctx); + } + } + + return 0; +} diff --git a/compile/clshmemcheck.cpp b/compile/clshmemcheck.cpp index 8e291c1..ca949a9 100644 --- a/compile/clshmemcheck.cpp +++ b/compile/clshmemcheck.cpp @@ -1,8 +1,10 @@ +#define CL_TARGET_OPENCL_VERSION 300 #include #include #include #include #include +#include void checkCLError(cl_int err, const char* msg) { if (err != CL_SUCCESS) { @@ -64,7 +66,8 @@ int main() { cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err); checkCLError(err, "create buffer"); - cl_command_queue q = clCreateCommandQueue(ctx, devices[d], 0, &err); + cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0}; + cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err); checkCLError(err, "create queue"); // Simple host → device → host round-trip test @@ -72,10 +75,11 @@ int main() { auto start = std::chrono::high_resolution_clock::now(); - void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err); - checkCLError(err, "map buffer"); - clWaitForEvents(1, &evt); + void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err); + checkCLError(err, "map buffer"); + clWaitForEvents(1, &evt); + clEnqueueUnmapMemObject(q, buf, mapped, 0, nullptr, nullptr); clReleaseMemObject(buf); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed = end - start; diff --git a/compile/clshmemlatency.cpp b/compile/clshmemlatency.cpp index e233dad..9eb5439 100644 --- a/compile/clshmemlatency.cpp +++ b/compile/clshmemlatency.cpp @@ -1,8 +1,10 @@ +#define CL_TARGET_OPENCL_VERSION 300 #include #include #include #include #include +#include void checkCLError(cl_int err, const char* msg) { if (err != CL_SUCCESS) { @@ -70,7 +72,8 @@ int main() { cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err); checkCLError(err, "create context"); - cl_command_queue q = clCreateCommandQueue(ctx, devices[d], 0, &err); + cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0}; + cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err); checkCLError(err, "create queue"); // -------------------- diff --git a/compile/clzerocopycheck.cpp b/compile/clzerocopycheck.cpp index 6b219e5..e198c71 100644 --- a/compile/clzerocopycheck.cpp +++ b/compile/clzerocopycheck.cpp @@ -1,3 +1,4 @@ +#define CL_TARGET_OPENCL_VERSION 300 #include #include #include @@ -32,8 +33,9 @@ int main() { cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err); CHECK(err, "clCreateContext"); - cl_command_queue q = clCreateCommandQueue(ctx, dev, 0, &err); - CHECK(err, "clCreateCommandQueue"); + cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0}; + cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err); + CHECK(err, "clCreateCommandQueueWithProperties"); // Create program and kernel const size_t srcLen = std::strlen(kernelSrc); @@ -97,7 +99,7 @@ int main() { // Validate bool ok = true; for (size_t i = 0; i < N; ++i) - if (outPtr[i] != 142 + i) ok = false; + if (outPtr[i] != static_cast(142 + i)) ok = false; std::cout << (ok ? "✅ GPU saw host writes (zero-copy confirmed)\n" : "❌ GPU did not see host writes (copying or staging occurred)\n");