CLTests: Add test for USE_HOST_PTR; fix build warnings

This commit is contained in:
2025-11-08 02:07:43 -04:00
parent bc56c83fad
commit b460c8b2d3
5 changed files with 183 additions and 16 deletions
+41 -8
View File
@@ -1,19 +1,52 @@
if(COMPILE_CL_CHECKS)
find_package(OpenCL REQUIRED)
option(COMPILE_CL_CHECKS "Compile CL checks" OFF)
if(COMPILE_CL_CHECKS)
# Find OpenCL: try find_package first, fall back to pkg-config
find_package(OpenCL QUIET)
if(OpenCL_FOUND)
# Normalize find_package variables to match pkg_check_modules naming
set(OPENCL_FOUND TRUE)
set(OPENCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS})
# Handle both OpenCL_LIBRARY (singular) and OpenCL_LIBRARIES (plural)
if(OpenCL_LIBRARIES)
set(OPENCL_LIBRARIES ${OpenCL_LIBRARIES})
else()
set(OPENCL_LIBRARIES ${OpenCL_LIBRARY})
endif()
set(OPENCL_LIBRARY_DIRS "")
message(STATUS "Found OpenCL using find_package")
else()
# Fall back to pkg-config
pkg_check_modules(OPENCL OpenCL)
if(NOT OPENCL_FOUND)
message(FATAL_ERROR
"Failed to find OpenCL: both find_package and "
"pkg_check_modules failed. Try installing the "
"'ocl-icd-opencl-dev' package (or the appropriate "
"OpenCL development package for your system)."
)
endif()
message(STATUS "Found OpenCL using pkg-config")
endif()
add_executable(clhostshmemptrcheck clhostshmemptrcheck.cpp)
target_include_directories(clhostshmemptrcheck
PUBLIC ${OPENCL_INCLUDE_DIRS})
target_link_libraries(clhostshmemptrcheck
${OPENCL_LIBRARIES})
add_executable(clshmemlatency clshmemlatency.cpp)
target_include_directories(clshmemlatency
PUBLIC ${OpenCL_INCLUDE_DIRS})
PUBLIC ${OPENCL_INCLUDE_DIRS})
target_link_libraries(clshmemlatency
${OpenCL_LIBRARY})
${OPENCL_LIBRARIES})
add_executable(clshmemcheck clshmemcheck.cpp)
target_include_directories(clshmemcheck
PUBLIC ${OpenCL_INCLUDE_DIRS})
PUBLIC ${OPENCL_INCLUDE_DIRS})
target_link_libraries(clshmemcheck
${OpenCL_LIBRARY})
${OPENCL_LIBRARIES})
add_executable(clzerocopycheck clzerocopycheck.cpp)
target_include_directories(clzerocopycheck
PUBLIC ${OpenCL_INCLUDE_DIRS})
PUBLIC ${OPENCL_INCLUDE_DIRS})
target_link_libraries(clzerocopycheck
${OpenCL_LIBRARY})
${OPENCL_LIBRARIES})
endif()
+125
View File
@@ -0,0 +1,125 @@
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <cstring>
static const char* clErrorToStr(cl_int err)
{
switch(err) {
case CL_SUCCESS: return "CL_SUCCESS";
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
default: return "UNKNOWN_ERROR";
}
}
// Try creating a USE_HOST_PTR buffer on this device
bool testUseHostPtr(cl_context ctx, cl_device_id dev)
{
const size_t bufSize = 1024;
std::vector<char> host(bufSize, 0);
cl_int err = 0;
cl_mem buf = clCreateBuffer(
ctx,
CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
bufSize,
host.data(),
&err
);
if(err != CL_SUCCESS) {
std::cerr << " clCreateBuffer(CL_MEM_USE_HOST_PTR) failed: "
<< clErrorToStr(err) << "\n";
return false;
}
// Try to enqueue a trivial write to verify it works
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
if(err != CL_SUCCESS){
std::cerr << " Failed to create command queue: "
<< clErrorToStr(err) << "\n";
clReleaseMemObject(buf);
return false;
}
err = clEnqueueWriteBuffer(q, buf, CL_TRUE, 0, bufSize, host.data(), 0, nullptr, nullptr);
clFinish(q);
bool ok = (err == CL_SUCCESS);
if(!ok) {
std::cerr << " clEnqueueWriteBuffer failed: " << clErrorToStr(err) << "\n";
}
clReleaseCommandQueue(q);
clReleaseMemObject(buf);
return ok;
}
int main()
{
cl_uint numPlatforms = 0;
clGetPlatformIDs(0, nullptr, &numPlatforms);
if(numPlatforms == 0){
std::cout << "No OpenCL platforms.\n";
return 0;
}
std::vector<cl_platform_id> plats(numPlatforms);
clGetPlatformIDs(numPlatforms, plats.data(), nullptr);
for(cl_uint p = 0; p < numPlatforms; ++p)
{
char buf[256];
clGetPlatformInfo(plats[p], CL_PLATFORM_NAME, sizeof(buf), buf, nullptr);
std::cout << "Platform: " << buf << "\n";
cl_uint numDevs = 0;
clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevs);
if(numDevs == 0) {
std::cout << " No devices found on this platform.\n";
continue;
}
std::vector<cl_device_id> devs(numDevs);
clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, numDevs, devs.data(), nullptr);
for(cl_uint d = 0; d < numDevs; ++d)
{
clGetDeviceInfo(devs[d], CL_DEVICE_NAME, sizeof(buf), buf, nullptr);
std::cout << " Device: " << buf << "\n";
// Create a context for this device
cl_int err;
cl_context ctx = clCreateContext(nullptr, 1, &devs[d], nullptr, nullptr, &err);
if(err != CL_SUCCESS) {
std::cout << " Failed to create context: "
<< clErrorToStr(err) << "\n";
continue;
}
bool supported = testUseHostPtr(ctx, devs[d]);
if(supported)
std::cout << " HOST_PTR appears supported.\n";
else
std::cout << " HOST_PTR appears NOT supported.\n";
clReleaseContext(ctx);
}
}
return 0;
}
+8 -4
View File
@@ -1,8 +1,10 @@
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <chrono>
#include <cstring>
#include <cstdlib>
void checkCLError(cl_int err, const char* msg) {
if (err != CL_SUCCESS) {
@@ -64,7 +66,8 @@ int main() {
cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err);
checkCLError(err, "create buffer");
cl_command_queue q = clCreateCommandQueue(ctx, devices[d], 0, &err);
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
checkCLError(err, "create queue");
// Simple host → device → host round-trip test
@@ -72,10 +75,11 @@ int main() {
auto start = std::chrono::high_resolution_clock::now();
void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err);
checkCLError(err, "map buffer");
clWaitForEvents(1, &evt);
void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err);
checkCLError(err, "map buffer");
clWaitForEvents(1, &evt);
clEnqueueUnmapMemObject(q, buf, mapped, 0, nullptr, nullptr);
clReleaseMemObject(buf);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> elapsed = end - start;
+4 -1
View File
@@ -1,8 +1,10 @@
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <chrono>
#include <cstring>
#include <cstdlib>
void checkCLError(cl_int err, const char* msg) {
if (err != CL_SUCCESS) {
@@ -70,7 +72,8 @@ int main() {
cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
checkCLError(err, "create context");
cl_command_queue q = clCreateCommandQueue(ctx, devices[d], 0, &err);
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
checkCLError(err, "create queue");
// --------------------
+5 -3
View File
@@ -1,3 +1,4 @@
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
#include <iostream>
#include <vector>
@@ -32,8 +33,9 @@ int main() {
cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err);
CHECK(err, "clCreateContext");
cl_command_queue q = clCreateCommandQueue(ctx, dev, 0, &err);
CHECK(err, "clCreateCommandQueue");
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
CHECK(err, "clCreateCommandQueueWithProperties");
// Create program and kernel
const size_t srcLen = std::strlen(kernelSrc);
@@ -97,7 +99,7 @@ int main() {
// Validate
bool ok = true;
for (size_t i = 0; i < N; ++i)
if (outPtr[i] != 142 + i) ok = false;
if (outPtr[i] != static_cast<int>(142 + i)) ok = false;
std::cout << (ok ? "✅ GPU saw host writes (zero-copy confirmed)\n"
: "❌ GPU did not see host writes (copying or staging occurred)\n");