Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions platforms/artic/runtime.impala
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
#[import(cc = "C", name = "anydsl_device_name")] fn runtime_device_name(_device: i32) -> &[u8];
#[import(cc = "C", name = "anydsl_device_check_feature_support")] fn runtime_device_check_feature_support(_device: i32, _feature: &[u8]) -> bool;

#[import(cc = "C", name = "anydsl_alloc")] fn runtime_alloc(_device: i32, _size: i64) -> &mut [i8];
#[import(cc = "C", name = "anydsl_alloc_host")] fn runtime_alloc_host(_device: i32, _size: i64) -> &mut [i8];
#[import(cc = "C", name = "anydsl_alloc_unified")] fn runtime_alloc_unified(_device: i32, _size: i64) -> &mut [i8];
#[import(cc = "C", name = "anydsl_copy")] fn runtime_copy(_src_device: i32, _src_ptr: &[i8], _src_offset: i64, _dst_device: i32, _dst_ptr: &mut [i8], _dst_offset: i64, _size: i64) -> ();
#[import(cc = "C", name = "anydsl_get_device_ptr")] fn runtime_get_device_ptr(_device: i32, _ptr: &[i8]) -> &[i8];
#[import(cc = "C", name = "anydsl_synchronize")] fn runtime_synchronize(_device: i32) -> ();
#[import(cc = "C", name = "anydsl_release")] fn runtime_release(_device: i32, _ptr: &[i8]) -> ();
#[import(cc = "C", name = "anydsl_release_host")] fn runtime_release_host(_device: i32, _ptr: &[i8]) -> ();
#[import(cc = "C", name = "anydsl_alloc")] fn runtime_alloc(_device: i32, _size: i64) -> &mut [i8];
#[import(cc = "C", name = "anydsl_alloc_host")] fn runtime_alloc_host(_device: i32, _size: i64) -> &mut [i8];
#[import(cc = "C", name = "anydsl_alloc_unified")] fn runtime_alloc_unified(_device: i32, _size: i64) -> &mut [i8];
#[import(cc = "C", name = "anydsl_copy")] fn runtime_copy(_src_device: i32, _src_ptr: &[i8], _src_offset: i64, _dst_device: i32, _dst_ptr: &mut [i8], _dst_offset: i64, _size: i64) -> ();
#[import(cc = "C", name = "anydsl_get_device_ptr")] fn runtime_get_device_ptr(_device: i32, _ptr: &[i8]) -> &[i8];
#[import(cc = "C", name = "anydsl_synchronize")] fn runtime_synchronize(_device: i32) -> ();
#[import(cc = "C", name = "anydsl_release")] fn runtime_release(_device: i32, _ptr: &[i8]) -> ();
#[import(cc = "C", name = "anydsl_release_host")] fn runtime_release_host(_device: i32, _ptr: &[i8]) -> ();
#[import(cc = "C", name = "anydsl_map_buffer_svm")] fn runtime_anydsl_map_buffer_svm(_device: i32, _ptr: &mut [i8], _size: i64) -> ();
#[import(cc = "C", name = "anydsl_unmap_buffer_svm")] fn runtime_anydsl_unmap_buffer_svm(_device: i32, _ptr: &mut [i8]) -> ();

#[import(cc = "C", name = "anydsl_random_seed")] fn random_seed(_: u32) -> ();
#[import(cc = "C", name = "anydsl_random_val_f32")] fn random_val_f32() -> f32;
Expand Down
8 changes: 8 additions & 0 deletions src/anydsl_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ void anydsl_release_host(int32_t mask, void* ptr) {
runtime().release_host(to_platform(mask), to_device(mask), ptr);
}

void anydsl_map_buffer_svm(int32_t mask, void* ptr, int64_t size) {
runtime().map_buffer_svm(to_platform(mask), to_device(mask), ptr, size);
}

void anydsl_unmap_buffer_svm(int32_t mask, void* ptr) {
runtime().unmap_buffer_svm(to_platform(mask), to_device(mask), ptr);
}

void anydsl_copy(
int32_t mask_src, const void* src, int64_t offset_src,
int32_t mask_dst, void* dst, int64_t offset_dst, int64_t size) {
Expand Down
3 changes: 3 additions & 0 deletions src/anydsl_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ AnyDSL_runtime_API void* anydsl_get_device_ptr(int32_t, void*);
AnyDSL_runtime_API void anydsl_release(int32_t, void*);
AnyDSL_runtime_API void anydsl_release_host(int32_t, void*);

AnyDSL_runtime_API void anydsl_map_buffer_svm(int32_t, void*, int64_t size);
AnyDSL_runtime_API void anydsl_unmap_buffer_svm(int32_t, void*);

AnyDSL_runtime_API void anydsl_copy(int32_t, const void*, int64_t, int32_t, void*, int64_t, int64_t);

AnyDSL_runtime_API void anydsl_launch_kernel(
Expand Down
73 changes: 59 additions & 14 deletions src/opencl_platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,9 @@ OpenCLPlatform::OpenCLPlatform(Runtime* runtime)
devices_.emplace_back(this, platform, device, version_major, version_minor, platform_name, device_name);

#ifdef CL_VERSION_2_0
devices_[dev].use_svm = svm_caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER;
if (getenv("ANYDSL_CL_DISABLE_SVM"))
devices_[dev].use_svm = false;
devices_[dev].svm_caps = svm_caps;
#endif

Expand Down Expand Up @@ -297,7 +300,7 @@ void* OpenCLPlatform::alloc(DeviceId dev, int64_t size) {
if (!size) return nullptr;

#ifdef CL_VERSION_2_0
if (devices_[dev].version_major == 2) {
if (devices_[dev].use_svm) {
cl_mem_flags flags = CL_MEM_READ_WRITE;
void* mem = clSVMAlloc(devices_[dev].ctx, flags, size, 0);
if (mem == nullptr)
Expand All @@ -318,7 +321,7 @@ void* OpenCLPlatform::alloc_unified(DeviceId dev, int64_t size) {
if (!size) return nullptr;

#ifdef CL_VERSION_2_0
if (devices_[dev].version_major == 2) {
if (devices_[dev].use_svm) {
cl_mem_flags flags = CL_MEM_READ_WRITE;
if (devices_[dev].svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)
flags |= CL_MEM_SVM_FINE_GRAIN_BUFFER;
Expand All @@ -336,14 +339,34 @@ void* OpenCLPlatform::alloc_unified(DeviceId dev, int64_t size) {

void OpenCLPlatform::release(DeviceId dev, void* ptr) {
#ifdef CL_VERSION_2_0
if (devices_[dev].version_major == 2)
if (devices_[dev].use_svm)
return clSVMFree(devices_[dev].ctx, ptr);
#endif
unused(dev);
cl_int err = clReleaseMemObject((cl_mem)ptr);
CHECK_OPENCL(err, "clReleaseMemObject()");
}

void OpenCLPlatform::map_buffer_svm(DeviceId dev, void* ptr, int64_t size) {
#ifdef CL_VERSION_2_0
if (devices_[dev].use_svm) {
clEnqueueSVMMap(devices_[dev].queue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, ptr, size, 0, nullptr, nullptr);
}
#else
error("Coarse-grained SVM is not supported on OpenCL device %d", dev);
#endif
}

void OpenCLPlatform::unmap_buffer_svm(DeviceId dev, void* ptr) {
#ifdef CL_VERSION_2_0
if (devices_[dev].use_svm) {
clEnqueueSVMUnmap(devices_[dev].queue, ptr, 0, nullptr, nullptr);
}
#else
error("Coarse-grained SVM is not supported on OpenCL device %d", dev);
#endif
}

void time_kernel_callback(cl_event event, cl_int, void* data) {
auto dev = reinterpret_cast<OpenCLPlatform::DeviceData*>(data);
cl_ulong end, start;
Expand Down Expand Up @@ -382,10 +405,10 @@ void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_para
cl_mem struct_buf = clCreateBuffer(devices_[dev].ctx, flags, launch_params.args.sizes[i], launch_params.args.data[i], &err);
CHECK_OPENCL(err, "clCreateBuffer()");
kernel_structs.push_back(struct_buf);
clSetKernelArg(kernel, i, sizeof(cl_mem), &struct_buf);
CHECK_OPENCL(clSetKernelArg(kernel, i, sizeof(cl_mem), &struct_buf), "clSetKernelArg");
} else {
#ifdef CL_VERSION_2_0
if (launch_params.args.types[i] == KernelArgType::Ptr && devices_[dev].version_major == 2) {
if (launch_params.args.types[i] == KernelArgType::Ptr && devices_[dev].use_svm) {
cl_int err = clSetKernelArgSVMPointer(kernel, i, *(void**)launch_params.args.data[i]);
CHECK_OPENCL(err, "clSetKernelArgSVMPointer()");
continue;
Expand Down Expand Up @@ -455,11 +478,12 @@ void OpenCLPlatform::copy(DeviceId dev_src, const void* src, int64_t offset_src,
unused(dev_dst);

#ifdef CL_VERSION_2_0
if (devices_[dev_src].version_major == 2 && devices_[dev_dst].version_major == 2)
if (devices_[dev_src].use_svm && devices_[dev_dst].use_svm)
return copy_svm(src, offset_src, dst, offset_dst, size);
if ((devices_[dev_src].version_major == 2 && devices_[dev_dst].version_major == 1) ||
(devices_[dev_src].version_major == 1 && devices_[dev_dst].version_major == 2))
if ((devices_[dev_src].use_svm != devices_[dev_dst].use_svm))
error("copy between SVM and non-SVM OpenCL devices % and %", dev_src, dev_dst);
if (devices_[dev_src].use_svm)
return copy_svm(src, offset_src, dst, offset_dst, size);
#endif

cl_int err = clEnqueueCopyBuffer(devices_[dev_src].queue, (cl_mem)src, (cl_mem)dst, offset_src, offset_dst, size, 0, NULL, NULL);
Expand All @@ -469,8 +493,16 @@ void OpenCLPlatform::copy(DeviceId dev_src, const void* src, int64_t offset_src,

void OpenCLPlatform::copy_from_host(const void* src, int64_t offset_src, DeviceId dev_dst, void* dst, int64_t offset_dst, int64_t size) {
#ifdef CL_VERSION_2_0
if (devices_[dev_dst].version_major == 2)
return copy_svm(src, offset_src, dst, offset_dst, size);
if (devices_[dev_dst].use_svm) {
if (!(devices_[dev_dst].svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
map_buffer_svm(dev_dst, const_cast<void *>(dst), size);
copy_svm(src, offset_src, dst, offset_dst, size);
unmap_buffer_svm(dev_dst, const_cast<void *>(dst));
//copy_svm_device(dev_dst, src, offset_src, dst, offset_dst, size);
} else
copy_svm(src, offset_src, dst, offset_dst, size);
return;
}
#endif
cl_int err = clEnqueueWriteBuffer(devices_[dev_dst].queue, (cl_mem)dst, CL_FALSE, offset_dst, size, (char*)src + offset_src, 0, NULL, NULL);
err |= clFinish(devices_[dev_dst].queue);
Expand All @@ -479,14 +511,27 @@ void OpenCLPlatform::copy_from_host(const void* src, int64_t offset_src, DeviceI

void OpenCLPlatform::copy_to_host(DeviceId dev_src, const void* src, int64_t offset_src, void* dst, int64_t offset_dst, int64_t size) {
#ifdef CL_VERSION_2_0
if (devices_[dev_src].version_major == 2)
return copy_svm(src, offset_src, dst, offset_dst, size);
if (devices_[dev_src].use_svm) {
if (!(devices_[dev_src].svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
//map_buffer_svm(dev_src, const_cast<void *>(src), size);
//copy_svm(src, offset_src, dst, offset_dst, size);
//unmap_buffer_svm(dev_src, const_cast<void *>(src));
copy_svm_device(dev_src, src, offset_src, dst, offset_dst, size);
}
else
copy_svm(src, offset_src, dst, offset_dst, size);
return;
}
#endif
cl_int err = clEnqueueReadBuffer(devices_[dev_src].queue, (cl_mem)src, CL_FALSE, offset_src, size, (char*)dst + offset_dst, 0, NULL, NULL);
err |= clFinish(devices_[dev_src].queue);
CHECK_OPENCL(err, "clEnqueueReadBuffer()");
}

void OpenCLPlatform::copy_svm_device(DeviceId dev, const void* src, int64_t offset_src, void* dst, int64_t offset_dst, int64_t size) {
clEnqueueSVMMemcpy(devices_[dev].queue, true, (char*)dst + offset_dst, (char*)src + offset_src, size, 0, nullptr, nullptr);
}

void OpenCLPlatform::copy_svm(const void* src, int64_t offset_src, void* dst, int64_t offset_dst, int64_t size) {
std::copy((char*)src + offset_src, (char*)src + offset_src + size, (char*)dst + offset_dst);
}
Expand Down Expand Up @@ -552,12 +597,12 @@ cl_program OpenCLPlatform::compile_program(DeviceId dev, cl_program program, con
options += " -cl-std=CL" + std::to_string(devices_[dev].version_major) + "." + std::to_string(devices_[dev].version_minor);

cl_build_status build_status;
cl_int err = clBuildProgram(program, 0, NULL, options.c_str(), NULL, NULL);
cl_int err = clBuildProgram(program, 1, &devices_[dev].dev, options.c_str(), NULL, NULL);
err |= clGetProgramBuildInfo(program, devices_[dev].dev, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, NULL);

if (build_status == CL_BUILD_ERROR || err != CL_SUCCESS) {
// determine the size of the options and log
size_t log_size, options_size;
size_t log_size = 0, options_size = 0;
err |= clGetProgramBuildInfo(program, devices_[dev].dev, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &options_size);
err |= clGetProgramBuildInfo(program, devices_[dev].dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

Expand Down
5 changes: 5 additions & 0 deletions src/opencl_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,16 @@ class OpenCLPlatform : public Platform {
void release(DeviceId dev, void* ptr) override;
void release_host(DeviceId, void*) override { command_unavailable("release_host"); }

void map_buffer_svm(DeviceId, void*, int64_t) override;
void unmap_buffer_svm(DeviceId, void*) override;

void launch_kernel(DeviceId dev, const LaunchParams& launch_params) override;
void synchronize(DeviceId dev) override;

void copy(DeviceId dev_src, const void* src, int64_t offset_src, DeviceId dev_dst, void* dst, int64_t offset_dst, int64_t size) override;
void copy_from_host(const void* src, int64_t offset_src, DeviceId dev_dst, void* dst, int64_t offset_dst, int64_t size) override;
void copy_to_host(DeviceId dev_src, const void* src, int64_t offset_src, void* dst, int64_t offset_dst, int64_t size) override;
void copy_svm_device(DeviceId dev, const void* src, int64_t offset_src, void* dst, int64_t offset_dst, int64_t size);
void copy_svm(const void* src, int64_t offset_src, void* dst, int64_t offset_dst, int64_t size);
void dynamic_profile(DeviceId dev, const std::string& filename);

Expand All @@ -58,6 +62,7 @@ class OpenCLPlatform : public Platform {
cl_command_queue queue = nullptr;
cl_context ctx = nullptr;
#ifdef CL_VERSION_2_0
bool use_svm = false;
cl_device_svm_capabilities svm_caps;
#endif
bool is_intel_fpga = false;
Expand Down
4 changes: 4 additions & 0 deletions src/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ class Platform {
virtual void release(DeviceId dev, void* ptr) = 0;
/// Releases page-locked host memory for a device on this platform.
virtual void release_host(DeviceId dev, void* ptr) = 0;
/// Map a coarse-grained SVM buffer for host access
virtual void map_buffer_svm(DeviceId dev, void* ptr, int64_t size) { command_unavailable("map_buffer_svm"); };
/// Unmap a coarse-grained SVM buffer for device access
virtual void unmap_buffer_svm(DeviceId dev, void* ptr) { command_unavailable("unmap_buffer_svm"); };

/// Launches a kernel with the given block/grid size and arguments.
virtual void launch_kernel(DeviceId dev, const LaunchParams& launch_params) = 0;
Expand Down
10 changes: 10 additions & 0 deletions src/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ void Runtime::release_host(PlatformId plat, DeviceId dev, void* ptr) {
platforms_[plat]->release_host(dev, ptr);
}

void Runtime::map_buffer_svm(PlatformId plat, DeviceId dev, void* ptr, int64_t size) {
check_device(plat, dev);
platforms_[plat]->map_buffer_svm(dev, ptr, size);
}

void Runtime::unmap_buffer_svm(PlatformId plat, DeviceId dev, void* ptr) {
check_device(plat, dev);
platforms_[plat]->unmap_buffer_svm(dev, ptr);
}

void Runtime::copy(
PlatformId plat_src, DeviceId dev_src, const void* src, int64_t offset_src,
PlatformId plat_dst, DeviceId dev_dst, void* dst, int64_t offset_dst, int64_t size) {
Expand Down
4 changes: 4 additions & 0 deletions src/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ class Runtime {
void release(PlatformId plat, DeviceId dev, void* ptr);
/// Releases previously allocated page-locked memory.
void release_host(PlatformId plat, DeviceId dev, void* ptr);
/// Map a coarse-grained SVM buffer for host access
void map_buffer_svm(PlatformId plat, DeviceId dev, void* ptr, int64_t size);
/// Unmap a coarse-grained SVM buffer for device access
void unmap_buffer_svm(PlatformId plat, DeviceId dev, void* ptr);
/// Copies memory between devices.
void copy(
PlatformId plat_src, DeviceId dev_src, const void* src, int64_t offset_src,
Expand Down