CLTests: Add test for USE_HOST_PTR; fix build warnings
This commit is contained in:
+41
-8
@@ -1,19 +1,52 @@
|
|||||||
if(COMPILE_CL_CHECKS)
|
option(COMPILE_CL_CHECKS "Compile CL checks" OFF)
|
||||||
find_package(OpenCL REQUIRED)
|
|
||||||
|
|
||||||
|
if(COMPILE_CL_CHECKS)
|
||||||
|
# Find OpenCL: try find_package first, fall back to pkg-config
|
||||||
|
find_package(OpenCL QUIET)
|
||||||
|
if(OpenCL_FOUND)
|
||||||
|
# Normalize find_package variables to match pkg_check_modules naming
|
||||||
|
set(OPENCL_FOUND TRUE)
|
||||||
|
set(OPENCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS})
|
||||||
|
# Handle both OpenCL_LIBRARY (singular) and OpenCL_LIBRARIES (plural)
|
||||||
|
if(OpenCL_LIBRARIES)
|
||||||
|
set(OPENCL_LIBRARIES ${OpenCL_LIBRARIES})
|
||||||
|
else()
|
||||||
|
set(OPENCL_LIBRARIES ${OpenCL_LIBRARY})
|
||||||
|
endif()
|
||||||
|
set(OPENCL_LIBRARY_DIRS "")
|
||||||
|
message(STATUS "Found OpenCL using find_package")
|
||||||
|
else()
|
||||||
|
# Fall back to pkg-config
|
||||||
|
pkg_check_modules(OPENCL OpenCL)
|
||||||
|
if(NOT OPENCL_FOUND)
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"Failed to find OpenCL: both find_package and "
|
||||||
|
"pkg_check_modules failed. Try installing the "
|
||||||
|
"'ocl-icd-opencl-dev' package (or the appropriate "
|
||||||
|
"OpenCL development package for your system)."
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
message(STATUS "Found OpenCL using pkg-config")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_executable(clhostshmemptrcheck clhostshmemptrcheck.cpp)
|
||||||
|
target_include_directories(clhostshmemptrcheck
|
||||||
|
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||||
|
target_link_libraries(clhostshmemptrcheck
|
||||||
|
${OPENCL_LIBRARIES})
|
||||||
add_executable(clshmemlatency clshmemlatency.cpp)
|
add_executable(clshmemlatency clshmemlatency.cpp)
|
||||||
target_include_directories(clshmemlatency
|
target_include_directories(clshmemlatency
|
||||||
PUBLIC ${OpenCL_INCLUDE_DIRS})
|
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||||
target_link_libraries(clshmemlatency
|
target_link_libraries(clshmemlatency
|
||||||
${OpenCL_LIBRARY})
|
${OPENCL_LIBRARIES})
|
||||||
add_executable(clshmemcheck clshmemcheck.cpp)
|
add_executable(clshmemcheck clshmemcheck.cpp)
|
||||||
target_include_directories(clshmemcheck
|
target_include_directories(clshmemcheck
|
||||||
PUBLIC ${OpenCL_INCLUDE_DIRS})
|
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||||
target_link_libraries(clshmemcheck
|
target_link_libraries(clshmemcheck
|
||||||
${OpenCL_LIBRARY})
|
${OPENCL_LIBRARIES})
|
||||||
add_executable(clzerocopycheck clzerocopycheck.cpp)
|
add_executable(clzerocopycheck clzerocopycheck.cpp)
|
||||||
target_include_directories(clzerocopycheck
|
target_include_directories(clzerocopycheck
|
||||||
PUBLIC ${OpenCL_INCLUDE_DIRS})
|
PUBLIC ${OPENCL_INCLUDE_DIRS})
|
||||||
target_link_libraries(clzerocopycheck
|
target_link_libraries(clzerocopycheck
|
||||||
${OpenCL_LIBRARY})
|
${OPENCL_LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -0,0 +1,125 @@
|
|||||||
|
#define CL_TARGET_OPENCL_VERSION 300
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
static const char* clErrorToStr(cl_int err)
|
||||||
|
{
|
||||||
|
switch(err) {
|
||||||
|
case CL_SUCCESS: return "CL_SUCCESS";
|
||||||
|
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
||||||
|
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
|
||||||
|
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
||||||
|
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
||||||
|
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
|
||||||
|
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||||
|
default: return "UNKNOWN_ERROR";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try creating a USE_HOST_PTR buffer on this device
|
||||||
|
bool testUseHostPtr(cl_context ctx, cl_device_id dev)
|
||||||
|
{
|
||||||
|
const size_t bufSize = 1024;
|
||||||
|
std::vector<char> host(bufSize, 0);
|
||||||
|
|
||||||
|
cl_int err = 0;
|
||||||
|
cl_mem buf = clCreateBuffer(
|
||||||
|
ctx,
|
||||||
|
CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
|
||||||
|
bufSize,
|
||||||
|
host.data(),
|
||||||
|
&err
|
||||||
|
);
|
||||||
|
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cerr << " clCreateBuffer(CL_MEM_USE_HOST_PTR) failed: "
|
||||||
|
<< clErrorToStr(err) << "\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to enqueue a trivial write to verify it works
|
||||||
|
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||||
|
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
|
||||||
|
if(err != CL_SUCCESS){
|
||||||
|
std::cerr << " Failed to create command queue: "
|
||||||
|
<< clErrorToStr(err) << "\n";
|
||||||
|
clReleaseMemObject(buf);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clEnqueueWriteBuffer(q, buf, CL_TRUE, 0, bufSize, host.data(), 0, nullptr, nullptr);
|
||||||
|
clFinish(q);
|
||||||
|
|
||||||
|
bool ok = (err == CL_SUCCESS);
|
||||||
|
|
||||||
|
if(!ok) {
|
||||||
|
std::cerr << " clEnqueueWriteBuffer failed: " << clErrorToStr(err) << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
clReleaseCommandQueue(q);
|
||||||
|
clReleaseMemObject(buf);
|
||||||
|
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
cl_uint numPlatforms = 0;
|
||||||
|
clGetPlatformIDs(0, nullptr, &numPlatforms);
|
||||||
|
|
||||||
|
if(numPlatforms == 0){
|
||||||
|
std::cout << "No OpenCL platforms.\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<cl_platform_id> plats(numPlatforms);
|
||||||
|
clGetPlatformIDs(numPlatforms, plats.data(), nullptr);
|
||||||
|
|
||||||
|
for(cl_uint p = 0; p < numPlatforms; ++p)
|
||||||
|
{
|
||||||
|
char buf[256];
|
||||||
|
|
||||||
|
clGetPlatformInfo(plats[p], CL_PLATFORM_NAME, sizeof(buf), buf, nullptr);
|
||||||
|
std::cout << "Platform: " << buf << "\n";
|
||||||
|
|
||||||
|
cl_uint numDevs = 0;
|
||||||
|
clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevs);
|
||||||
|
|
||||||
|
if(numDevs == 0) {
|
||||||
|
std::cout << " No devices found on this platform.\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<cl_device_id> devs(numDevs);
|
||||||
|
clGetDeviceIDs(plats[p], CL_DEVICE_TYPE_ALL, numDevs, devs.data(), nullptr);
|
||||||
|
|
||||||
|
for(cl_uint d = 0; d < numDevs; ++d)
|
||||||
|
{
|
||||||
|
clGetDeviceInfo(devs[d], CL_DEVICE_NAME, sizeof(buf), buf, nullptr);
|
||||||
|
std::cout << " Device: " << buf << "\n";
|
||||||
|
|
||||||
|
// Create a context for this device
|
||||||
|
cl_int err;
|
||||||
|
cl_context ctx = clCreateContext(nullptr, 1, &devs[d], nullptr, nullptr, &err);
|
||||||
|
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cout << " Failed to create context: "
|
||||||
|
<< clErrorToStr(err) << "\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool supported = testUseHostPtr(ctx, devs[d]);
|
||||||
|
|
||||||
|
if(supported)
|
||||||
|
std::cout << " HOST_PTR appears supported.\n";
|
||||||
|
else
|
||||||
|
std::cout << " HOST_PTR appears NOT supported.\n";
|
||||||
|
|
||||||
|
clReleaseContext(ctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -1,8 +1,10 @@
|
|||||||
|
#define CL_TARGET_OPENCL_VERSION 300
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
void checkCLError(cl_int err, const char* msg) {
|
void checkCLError(cl_int err, const char* msg) {
|
||||||
if (err != CL_SUCCESS) {
|
if (err != CL_SUCCESS) {
|
||||||
@@ -64,7 +66,8 @@ int main() {
|
|||||||
cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err);
|
cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err);
|
||||||
checkCLError(err, "create buffer");
|
checkCLError(err, "create buffer");
|
||||||
|
|
||||||
cl_command_queue q = clCreateCommandQueue(ctx, devices[d], 0, &err);
|
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||||
|
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
|
||||||
checkCLError(err, "create queue");
|
checkCLError(err, "create queue");
|
||||||
|
|
||||||
// Simple host → device → host round-trip test
|
// Simple host → device → host round-trip test
|
||||||
@@ -72,10 +75,11 @@ int main() {
|
|||||||
|
|
||||||
auto start = std::chrono::high_resolution_clock::now();
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err);
|
void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err);
|
||||||
checkCLError(err, "map buffer");
|
checkCLError(err, "map buffer");
|
||||||
clWaitForEvents(1, &evt);
|
clWaitForEvents(1, &evt);
|
||||||
|
|
||||||
|
clEnqueueUnmapMemObject(q, buf, mapped, 0, nullptr, nullptr);
|
||||||
clReleaseMemObject(buf);
|
clReleaseMemObject(buf);
|
||||||
auto end = std::chrono::high_resolution_clock::now();
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
std::chrono::duration<double, std::milli> elapsed = end - start;
|
std::chrono::duration<double, std::milli> elapsed = end - start;
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
|
#define CL_TARGET_OPENCL_VERSION 300
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
void checkCLError(cl_int err, const char* msg) {
|
void checkCLError(cl_int err, const char* msg) {
|
||||||
if (err != CL_SUCCESS) {
|
if (err != CL_SUCCESS) {
|
||||||
@@ -70,7 +72,8 @@ int main() {
|
|||||||
cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
|
cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
|
||||||
checkCLError(err, "create context");
|
checkCLError(err, "create context");
|
||||||
|
|
||||||
cl_command_queue q = clCreateCommandQueue(ctx, devices[d], 0, &err);
|
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||||
|
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err);
|
||||||
checkCLError(err, "create queue");
|
checkCLError(err, "create queue");
|
||||||
|
|
||||||
// --------------------
|
// --------------------
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
#define CL_TARGET_OPENCL_VERSION 300
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@@ -32,8 +33,9 @@ int main() {
|
|||||||
cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err);
|
cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err);
|
||||||
CHECK(err, "clCreateContext");
|
CHECK(err, "clCreateContext");
|
||||||
|
|
||||||
cl_command_queue q = clCreateCommandQueue(ctx, dev, 0, &err);
|
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||||
CHECK(err, "clCreateCommandQueue");
|
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
|
||||||
|
CHECK(err, "clCreateCommandQueueWithProperties");
|
||||||
|
|
||||||
// Create program and kernel
|
// Create program and kernel
|
||||||
const size_t srcLen = std::strlen(kernelSrc);
|
const size_t srcLen = std::strlen(kernelSrc);
|
||||||
@@ -97,7 +99,7 @@ int main() {
|
|||||||
// Validate
|
// Validate
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
for (size_t i = 0; i < N; ++i)
|
for (size_t i = 0; i < N; ++i)
|
||||||
if (outPtr[i] != 142 + i) ok = false;
|
if (outPtr[i] != static_cast<int>(142 + i)) ok = false;
|
||||||
|
|
||||||
std::cout << (ok ? "✅ GPU saw host writes (zero-copy confirmed)\n"
|
std::cout << (ok ? "✅ GPU saw host writes (zero-copy confirmed)\n"
|
||||||
: "❌ GPU did not see host writes (copying or staging occurred)\n");
|
: "❌ GPU did not see host writes (copying or staging occurred)\n");
|
||||||
|
|||||||
Reference in New Issue
Block a user