#define CL_TARGET_OPENCL_VERSION 300 #include #include #include #include #include #include void checkCLError(cl_int err, const char* msg) { if (err != CL_SUCCESS) { std::cerr << "OpenCL Error " << err << " at: " << msg << std::endl; exit(1); } } int main() { cl_uint numPlatforms = 0; checkCLError(clGetPlatformIDs(0, nullptr, &numPlatforms), "get num platforms"); std::vector platforms(numPlatforms); checkCLError(clGetPlatformIDs(numPlatforms, platforms.data(), nullptr), "get platforms"); std::cout << "Found " << numPlatforms << " OpenCL platforms\n\n"; for (cl_uint p = 0; p < numPlatforms; ++p) { char platformName[256]; clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, sizeof(platformName), platformName, nullptr); std::cout << "Platform " << p << ": " << platformName << "\n"; cl_uint numDevices = 0; clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices); std::vector devices(numDevices); clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, numDevices, devices.data(), nullptr); for (cl_uint d = 0; d < numDevices; ++d) { char deviceName[256]; clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr); std::cout << " Device " << d << ": " << deviceName << "\n"; cl_bool unifiedMem = CL_FALSE; clGetDeviceInfo(devices[d], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unifiedMem), &unifiedMem, nullptr); std::cout << " Host-Device unified memory: " << (unifiedMem ? "Yes" : "No") << "\n"; #ifdef CL_DEVICE_SVM_CAPABILITIES cl_device_svm_capabilities svmCaps = 0; clGetDeviceInfo(devices[d], CL_DEVICE_SVM_CAPABILITIES, sizeof(svmCaps), &svmCaps, nullptr); std::cout << " SVM capabilities:\n"; if (!svmCaps) std::cout << " None\n"; if (svmCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) std::cout << " - Coarse-grain buffer sharing\n"; if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) std::cout << " - Fine-grain buffer sharing\n"; if (svmCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) std::cout << " - Fine-grain system sharing\n"; if (svmCaps & CL_DEVICE_SVM_ATOMICS) std::cout << " - Atomics supported\n"; #endif // Optional runtime test: check if CL_MEM_USE_HOST_PTR buffer reuses pointer const size_t bufSize = 1024 * 1024; std::vector hostBuffer(bufSize, 42); cl_int err; cl_context ctx = clCreateContext(nullptr, 1, &devices[d], nullptr, nullptr, &err); checkCLError(err, "create context"); cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, bufSize, hostBuffer.data(), &err); checkCLError(err, "create buffer"); cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0}; cl_command_queue q = clCreateCommandQueueWithProperties(ctx, devices[d], queueProps, &err); checkCLError(err, "create queue"); // Simple host → device → host round-trip test cl_event evt; auto start = std::chrono::high_resolution_clock::now(); void* mapped = clEnqueueMapBuffer(q, buf, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, &evt, &err); checkCLError(err, "map buffer"); clWaitForEvents(1, &evt); clEnqueueUnmapMemObject(q, buf, mapped, 0, nullptr, nullptr); clReleaseMemObject(buf); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed = end - start; std::cout << " Map latency: " << elapsed.count() << " ms (lower → likely zero-copy)\n"; clReleaseCommandQueue(q); clReleaseContext(ctx); } std::cout << std::endl; } return 0; }