91 lines
3.8 KiB
C++
91 lines
3.8 KiB
C++
|
|
#include <CL/cl.h>
|
||
|
|
#include <iostream>
|
||
|
|
#include <vector>
|
||
|
|
#include <chrono>
|
||
|
|
#include <cstring>
|
||
|
|
|
||
|
|
void checkCLError(cl_int err, const char* msg) {
|
||
|
|
if (err != CL_SUCCESS) {
|
||
|
|
std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl;
|
||
|
|
exit(1);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
int main() {
|
||
|
|
cl_uint numPlatforms = 0;
|
||
|
|
checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms");
|
||
|
|
std::vector<cl_platform_id> platforms(numPlatforms);
|
||
|
|
checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms");
|
||
|
|
|
||
|
|
std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n";
|
||
|
|
|
||
|
|
for (cl_uint p = 0; p < numPlatforms; ++p) {
|
||
|
|
char platformName[256];
|
||
|
|
clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr);
|
||
|
|
std::cout << "Platform " << p << ": " << platformName << "\n";
|
||
|
|
|
||
|
|
cl_uint numDevices = 0;
|
||
|
|
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
|
||
|
|
std::vector<cl_device_id> devices(numDevices);
|
||
|
|
clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr);
|
||
|
|
|
||
|
|
for (cl_uint d = 0; d < numDevices; ++d) {
|
||
|
|
char deviceName[256];
|
||
|
|
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
|
||
|
|
std::cout << " Device " << d << ": " << deviceName << "\n";
|
||
|
|
|
||
|
|
cl_bool unifiedMem = CL_FALSE;
|
||
|
|
clGetDeviceInfo(devices[d], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unifiedMem), &unifiedMem, nullptr);
|
||
|
|
std::cout << " Host-Device unified memory: " << (unifiedMem ? "Yes" : "No") << "\n";
|
||
|
|
|
||
|
|
#ifdef CL_DEVICE_SVM_CAPABILITIES
|
||
|
|
cl_device_svm_capabilities svmCaps = 0;
|
||
|
|
clGetDeviceInfo(devices[d], CL_DEVICE_SVM_CAPABILITIES, sizeof(svmCaps), &svmCaps, nullptr);
|
||
|
|
std::cout << " SVM capabilities:\n";
|
||
|
|
if (!svmCaps) std::cout << " None\n";
|
||
|
|
if (svmCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER)
|
||
|
|
std::cout << " - Coarse-grain buffer sharing\n";
|
||
|
|
if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)
|
||
|
|
std::cout << " - Fine-grain buffer sharing\n";
|
||
|
|
if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)
|
||
|
|
std::cout << " - Fine-grain system sharing\n";
|
||
|
|
if (svmCaps & CL_DEVICE_SVM_ATOMICS)
|
||
|
|
std::cout << " - Atomics supported\n";
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// Optional runtime test: check if CL_MEM_USE_HOST_PTR buffer reuses pointer
|
||
|
|
const size_t bufSize = 1024 * 1024;
|
||
|
|
std::vector<char> hostBuffer(bufSize, 42);
|
||
|
|
|
||
|
|
cl_int err;
|
||
|
|
cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err);
|
||
|
|
checkCLError(err, "create context");
|
||
|
|
|
||
|
|
cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err);
|
||
|
|
checkCLError(err, "create buffer");
|
||
|
|
|
||
|
|
cl_command_queue q = clCreateCommandQueue(ctx, devices[d], 0, &err);
|
||
|
|
checkCLError(err, "create queue");
|
||
|
|
|
||
|
|
// Simple host → device → host round-trip test
|
||
|
|
cl_event evt;
|
||
|
|
|
||
|
|
auto start = std::chrono::high_resolution_clock::now();
|
||
|
|
|
||
|
|
void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err);
|
||
|
|
checkCLError(err, "map buffer");
|
||
|
|
clWaitForEvents(1, &evt);
|
||
|
|
|
||
|
|
clReleaseMemObject(buf);
|
||
|
|
auto end = std::chrono::high_resolution_clock::now();
|
||
|
|
std::chrono::duration<double, std::milli> elapsed = end - start;
|
||
|
|
std::cout << " Map latency: " << elapsed.count() << " ms (lower → likely zero-copy)\n";
|
||
|
|
|
||
|
|
clReleaseCommandQueue(q);
|
||
|
|
clReleaseContext(ctx);
|
||
|
|
}
|
||
|
|
std::cout << std::endl;
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|