Skip to content

Commit

Permalink
Update readme, simplify C++
Browse files Browse the repository at this point in the history
  • Loading branch information
tbenthompson committed Aug 11, 2021
1 parent 2f7587d commit 547955c
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 69 deletions.
4 changes: 1 addition & 3 deletions cutde/coordinators.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,7 @@ def call_clu_free(obs_pts, tris, slips, nu, fnc):
gpu_results = backend.zeros(n_obs * vec_dim, float_type)

n_obs_blocks = int(np.ceil(n_obs / block_size))
gpu_config = dict(
free_block_size=block_size, float_type=backend.np_to_c_type(float_type)
)
gpu_config = dict(float_type=backend.np_to_c_type(float_type))
module = backend.load_module("free.cu", tmpl_args=gpu_config, tmpl_dir=source_dir)

# Split up the sources into chunks so that we don't completely overwhelm a
Expand Down
32 changes: 7 additions & 25 deletions cutde/cpp_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,21 @@ struct XYZ {
SIZE_T z;
};

thread_local XYZ threadIdx;
thread_local XYZ blockIdx;
XYZ blockDim;
XYZ gridDim;

WITHIN_KERNEL SIZE_T get_local_id(unsigned int dim)
{
if(dim == 0) return threadIdx.x;
if(dim == 1) return threadIdx.y;
if(dim == 2) return threadIdx.z;
return 0;
}
WITHIN_KERNEL SIZE_T get_local_id(unsigned int dim) { return 0; }

WITHIN_KERNEL SIZE_T get_group_id(unsigned int dim)
{
if(dim == 0) return blockIdx.x;
if(dim == 1) return blockIdx.y;
if(dim == 2) return blockIdx.z;
return 0;
}
WITHIN_KERNEL SIZE_T get_local_size(unsigned int dim)
{
if(dim == 0) return blockDim.x;
if(dim == 1) return blockDim.y;
if(dim == 2) return blockDim.z;
return 1;
}

WITHIN_KERNEL SIZE_T get_local_size(unsigned int dim) { return 1; }

WITHIN_KERNEL SIZE_T get_num_groups(unsigned int dim)
{
if(dim == 0) return gridDim.x;
Expand All @@ -54,11 +43,11 @@ WITHIN_KERNEL SIZE_T get_num_groups(unsigned int dim)
}
WITHIN_KERNEL SIZE_T get_global_size(unsigned int dim)
{
return get_num_groups(dim) * get_local_size(dim);
return get_num_groups(dim);
}
WITHIN_KERNEL SIZE_T get_global_id(unsigned int dim)
{
return get_local_id(dim) + get_group_id(dim) * get_local_size(dim);
return get_group_id(dim);
}

#include <pybind11/pybind11.h>
Expand Down Expand Up @@ -101,17 +90,10 @@ decltype(auto) wrapper(R(*fn)(Args...))
std::tuple<SIZE_T,SIZE_T,SIZE_T> block)
{
gridDim = {std::get<0>(grid), std::get<1>(grid), std::get<2>(grid)};
blockDim = {std::get<0>(block), std::get<1>(block), std::get<2>(block)};
blockIdx = {0,0,0};
threadIdx = {0,0,0};

SIZE_T Ngrid = gridDim.x * gridDim.y * gridDim.z;

// block must be (1,1,1)
assert(std::get<0>(block) == 0);
assert(std::get<1>(block) == 0);
assert(std::get<2>(block) == 0);

auto ptr_args = std::make_tuple(conv_arg(args)...);

#pragma omp parallel for
Expand Down
9 changes: 5 additions & 4 deletions cutde/free.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ void free_${name}(GLOBAL_MEM Real* results,
{
int i = get_global_id(0);
int group_id = get_local_id(0);
int block_size = get_local_size(0);

%for d_obs in range(vec_dim):
Real sum${d_obs} = 0.0;
Expand All @@ -27,14 +28,14 @@ void free_${name}(GLOBAL_MEM Real* results,
}

% for d1 in range(3):
LOCAL_MEM Real3 sh_tri${d1}[${free_block_size}];
LOCAL_MEM Real3 sh_tri${d1}[256];
% endfor
LOCAL_MEM Real3 sh_slips[${free_block_size}];
LOCAL_MEM Real3 sh_slips[256];

// NOTE: The blocking scheme set up here seems to be irrelevant because the
// runtime is totally dominated by the floating point operations inside the
// TDE evaluation.
for (int block_start = src_start; block_start < src_end; block_start += ${free_block_size}) {
for (int block_start = src_start; block_start < src_end; block_start += block_size) {
int j = block_start + group_id;
if (j < src_end) {
% for d1 in range(3):
Expand All @@ -48,7 +49,7 @@ void free_${name}(GLOBAL_MEM Real* results,
${common.LOCAL_BARRIER()}

if (i < n_obs) {
int block_end = min(src_end, block_start + ${free_block_size});
int block_end = min(src_end, block_start + block_size);
int block_length = block_end - block_start;
for (int block_idx = 0; block_idx < block_length; block_idx++) {
% for d1 in range(3):
Expand Down
Loading

0 comments on commit 547955c

Please sign in to comment.