Add some compile-time CL utilities

This commit is contained in:
2025-10-25 03:39:42 -04:00
parent 266cabcddb
commit 0e872042ee
5 changed files with 406 additions and 0 deletions
+90
View File
@@ -0,0 +1,90 @@
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <chrono>
#include <cstring>
void checkCLError(cl_int err, const char* msg) {
if (err != CL_SUCCESS) {
std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl;
exit(1);
}
}
int main() {
cl_uint numPlatforms = 0;
checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms");
std::vector<cl_platform_id> platforms(numPlatforms);
checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms");
std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n";
for (cl_uint p = 0; p < numPlatforms; ++p) {
char platformName[256];
clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr);
std::cout << "Platform " << p << ": " << platformName << "\n";
cl_uint numDevices = 0;
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
std::vector<cl_device_id> devices(numDevices);
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr);
for (cl_uint d = 0; d < numDevices; ++d) {
char deviceName[256];
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
std::cout << " Device " << d << ": " << deviceName << "\n";
cl_bool unifiedMem = CL_FALSE;
clGetDeviceInfo(devices[d], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unifiedMem), &unifiedMem, nullptr);
std::cout << " Host-Device unified memory: " << (unifiedMem ? "Yes" : "No") << "\n";
#ifdef CL_DEVICE_SVM_CAPABILITIES
cl_device_svm_capabilities svmCaps = 0;
clGetDeviceInfo(devices[d], CL_DEVICE_SVM_CAPABILITIES, sizeof(svmCaps), &svmCaps, nullptr);
std::cout << " SVM capabilities:\n";
if (!svmCaps) std::cout << " None\n";
if (svmCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER)
std::cout << " - Coarse-grain buffer sharing\n";
if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)
std::cout << " - Fine-grain buffer sharing\n";
if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)
std::cout << " - Fine-grain system sharing\n";
if (svmCaps & CL_DEVICE_SVM_ATOMICS)
std::cout << " - Atomics supported\n";
#endif
// Optional runtime test: check if CL_MEM_USE_HOST_PTR buffer reuses pointer
const size_t bufSize = 1024 * 1024;
std::vector<char> hostBuffer(bufSize, 42);
cl_int err;
cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
checkCLError(err, "create context");
cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err);
checkCLError(err, "create buffer");
cl_command_queue q = clCreateCommandQueue(ctx, devices[d], 0, &err);
checkCLError(err, "create queue");
// Simple host → device → host round-trip test
cl_event evt;
auto start = std::chrono::high_resolution_clock::now();
void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err);
checkCLError(err, "map buffer");
clWaitForEvents(1, &evt);
clReleaseMemObject(buf);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> elapsed = end - start;
std::cout << " Map latency: " << elapsed.count() << " ms (lower → likely zero-copy)\n";
clReleaseCommandQueue(q);
clReleaseContext(ctx);
}
std::cout << std::endl;
}
return 0;
}