Rename buildmach stuff to buildmach/
This commit is contained in:
@@ -0,0 +1,117 @@
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#include <CL/cl.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
#define CHECK(err, msg) \
|
||||
if (err != CL_SUCCESS) { \
|
||||
std::cerr << "ERROR: " << msg << " (" << err << ")\n"; \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
const char *kernelSrc = R"CLC(
|
||||
__kernel void check_shared(__global const int* in, __global int* out) {
|
||||
int gid = get_global_id(0);
|
||||
out[gid] = in[gid] + 42; // simple deterministic transform
|
||||
}
|
||||
)CLC";
|
||||
|
||||
int main() {
|
||||
cl_int err;
|
||||
|
||||
// Pick first available device
|
||||
cl_uint numPlatforms;
|
||||
CHECK(clGetPlatformIDs(0, nullptr, &numPlatforms), "clGetPlatformIDs count");
|
||||
std::vector<cl_platform_id> plats(numPlatforms);
|
||||
CHECK(clGetPlatformIDs(numPlatforms, plats.data(), nullptr), "clGetPlatformIDs");
|
||||
|
||||
cl_platform_id plat = plats[0];
|
||||
cl_device_id dev;
|
||||
CHECK(clGetDeviceIDs(plat, CL_DEVICE_TYPE_GPU, 1, &dev, nullptr), "clGetDeviceIDs");
|
||||
|
||||
cl_context ctx = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &err);
|
||||
CHECK(err, "clCreateContext");
|
||||
|
||||
cl_queue_properties queueProps[] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
cl_command_queue q = clCreateCommandQueueWithProperties(ctx, dev, queueProps, &err);
|
||||
CHECK(err, "clCreateCommandQueueWithProperties");
|
||||
|
||||
// Create program and kernel
|
||||
const size_t srcLen = std::strlen(kernelSrc);
|
||||
cl_program prog = clCreateProgramWithSource(ctx, 1, &kernelSrc, &srcLen, &err);
|
||||
CHECK(err, "clCreateProgramWithSource");
|
||||
|
||||
err = clBuildProgram(prog, 1, &dev, nullptr, nullptr, nullptr);
|
||||
if (err != CL_SUCCESS) {
|
||||
size_t logSize;
|
||||
clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize);
|
||||
std::vector<char> log(logSize);
|
||||
clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, logSize, log.data(), nullptr);
|
||||
std::cerr << "--- Build Log ---\n" << log.data() << "\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
cl_kernel krn = clCreateKernel(prog, "check_shared", &err);
|
||||
CHECK(err, "clCreateKernel");
|
||||
|
||||
const size_t N = 8;
|
||||
size_t bufSize = N * sizeof(int);
|
||||
|
||||
// Allocate host-visible buffer
|
||||
cl_mem bufIn = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufSize, nullptr, &err);
|
||||
CHECK(err, "clCreateBuffer input");
|
||||
cl_mem bufOut = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, bufSize, nullptr, &err);
|
||||
CHECK(err, "clCreateBuffer output");
|
||||
|
||||
// Map the buffer (should return pointer to real host memory if unified)
|
||||
int* hostPtr = (int*)clEnqueueMapBuffer(q, bufIn, CL_TRUE, CL_MAP_WRITE, 0, bufSize, 0, nullptr, nullptr, &err);
|
||||
CHECK(err, "clEnqueueMapBuffer");
|
||||
|
||||
std::cout << "Mapped host pointer: " << static_cast<void*>(hostPtr) << "\n";
|
||||
|
||||
// Write pattern directly into mapped memory
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
hostPtr[i] = 100 + i;
|
||||
|
||||
// No clEnqueueWriteBuffer call! We rely on shared memory.
|
||||
clEnqueueUnmapMemObject(q, bufIn, hostPtr, 0, nullptr, nullptr);
|
||||
clFinish(q);
|
||||
|
||||
// Set kernel args
|
||||
clSetKernelArg(krn, 0, sizeof(cl_mem), &bufIn);
|
||||
clSetKernelArg(krn, 1, sizeof(cl_mem), &bufOut);
|
||||
|
||||
size_t global = N;
|
||||
err = clEnqueueNDRangeKernel(q, krn, 1, nullptr, &global, nullptr, 0, nullptr, nullptr);
|
||||
CHECK(err, "clEnqueueNDRangeKernel");
|
||||
clFinish(q);
|
||||
|
||||
// Read back result
|
||||
int* outPtr = (int*)clEnqueueMapBuffer(q, bufOut, CL_TRUE, CL_MAP_READ, 0, bufSize, 0, nullptr, nullptr, &err);
|
||||
CHECK(err, "map output");
|
||||
|
||||
std::cout << "Result: ";
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
std::cout << outPtr[i] << " ";
|
||||
std::cout << "\n";
|
||||
|
||||
// Validate
|
||||
bool ok = true;
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
if (outPtr[i] != static_cast<int>(142 + i)) ok = false;
|
||||
|
||||
std::cout << (ok ? "✅ GPU saw host writes (zero-copy confirmed)\n"
|
||||
: "❌ GPU did not see host writes (copying or staging occurred)\n");
|
||||
|
||||
clEnqueueUnmapMemObject(q, bufOut, outPtr, 0, nullptr, nullptr);
|
||||
clFinish(q);
|
||||
|
||||
clReleaseMemObject(bufIn);
|
||||
clReleaseMemObject(bufOut);
|
||||
clReleaseKernel(krn);
|
||||
clReleaseProgram(prog);
|
||||
clReleaseCommandQueue(q);
|
||||
clReleaseContext(ctx);
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user