Skip to content

Commit

Permalink
Fix fortran ordering bug. Re-name some interface functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
tbenthompson committed Apr 1, 2021
1 parent f9f1532 commit fca4dd8
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 35 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ See below for usage and installation instructions.
Howdy! Usage is really simple:

```
import cutde.fullspace
import cutde
disp = cutde.fullspace.clu_disp(pts, tris, slips, 0.25)
strain = cutde.fullspace.clu_strain(pts, tris, slips, nu)
disp = cutde.disp(pts, tris, slips, 0.25)
strain = cutde.strain(pts, tris, slips, nu)
```

* `pts` is a `np.array` with shape `(N, 3)`
Expand All @@ -39,7 +39,7 @@ IMPORTANT: N should be the same for all these arrays. There is exactly one trian
Use:

```
stress = cutde.fullspace.strain_to_stress(strain, sm, nu)
stress = cutde.strain_to_stress(strain, sm, nu)
```

to convert from stress to strain assuming isotropic linear elasticity. `sm` is the shear modulus and `nu` is the Poisson ratio.
Expand All @@ -49,10 +49,10 @@ to convert from stress to strain assuming isotropic linear elasticity. `sm` is t
If, instead, you want to create a matrix representing the interaction between every observation point and every source triangle, there is a different interface:

```
import cutde.fullspace
import cutde
disp = cutde.fullspace.clu_disp_all_pairs(pts, tris, slips, 0.25)
strain = cutde.fullspace.clu_strain_all_pairs(pts, tris, slips, nu)
disp = cutde.disp_all_pairs(pts, tris, slips, 0.25)
strain = cutde.strain_all_pairs(pts, tris, slips, nu)
```

* `pts` is a `np.array` with shape `(N_OBS_PTS, 3)`
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
21.03.31.1
21.04.01
11 changes: 8 additions & 3 deletions cutde/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import os

source_dir = os.path.dirname(os.path.realpath(__file__))
from .fullspace import ( # noqa: F401
disp,
disp_all_pairs,
py_disp,
strain,
strain_all_pairs,
strain_to_stress,
)
16 changes: 12 additions & 4 deletions cutde/fullspace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ typedef struct Real6 {
Real c;
} Real6;

WITHIN_KERNEL void print(Real x) {
printf("%f \n", x);
}

<%def name="binop(dim, name, op, b_scalar = False)">
<%
b_type = 'Real' if b_scalar else 'Real' + str(dim)
Expand Down Expand Up @@ -56,6 +60,14 @@ ${binop(dim, 'sub_scalar','-',b_scalar = True)}
${binop(dim, 'mul_scalar','*',b_scalar = True)}
${binop(dim, 'div_scalar','/',b_scalar = True)}

WITHIN_KERNEL void print${dim}(Real${dim} x) {
<%
format_str = "%f " * dim
var_str = ','.join(['x.' + comp(d) for d in range(dim)])
%>
printf("${format_str} \n", ${var_str});
}

WITHIN_KERNEL Real sum${dim}(Real${dim} x) {
Real out = 0.0;
% for d in range(dim):
Expand Down Expand Up @@ -160,10 +172,6 @@ WITHIN_KERNEL Real6 tensor_transform3(Real3 a, Real3 b, Real3 c, Real6 tensor) {
return out;
}

WITHIN_KERNEL void print_vec(Real3 x) {
printf("%f, %f, %f\n", x.x, x.y, x.z);
}

WITHIN_KERNEL int trimodefinder(Real3 obs, Real3 tri0, Real3 tri1, Real3 tri2) {
// trimodefinder calculates the normalized barycentric coordinates of
// the points with respect to the TD vertices and specifies the appropriate
Expand Down
41 changes: 29 additions & 12 deletions cutde/fullspace.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os

import numpy as np

import cutde.gpu as cluda

from . import source_dir
from .TDdispFS import TDdispFS

source_dir = os.path.dirname(os.path.realpath(__file__))


def py_disp(obs_pt, tri, slip, nu):
return TDdispFS(obs_pt, tri, slip, nu)
Expand All @@ -24,6 +27,13 @@ def call_clu(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type):
gpu_config = dict(block_size=block_size, float_type=cluda.np_to_c_type(float_type))
module = cluda.load_gpu("fullspace.cu", tmpl_args=gpu_config, tmpl_dir=source_dir)

if obs_pts.flags.f_contiguous:
obs_pts = obs_pts.copy()
if tris.flags.f_contiguous:
tris = tris.copy()
if slips.flags.f_contiguous:
slips = slips.copy()

gpu_results = cluda.empty_gpu(n * out_dim, float_type)
gpu_obs_pts = cluda.to_gpu(obs_pts, float_type)
gpu_tris = cluda.to_gpu(tris, float_type)
Expand All @@ -43,32 +53,39 @@ def call_clu(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type):
return out


def call_clu_all_pairs(obs_pts, src_tris, slips, nu, fnc_name, out_dim, float_type):
def call_clu_all_pairs(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type):
assert obs_pts.shape[1] == 3
assert src_tris.shape[1] == 3
assert src_tris.shape[2] == 3
assert slips.shape[0] == src_tris.shape[0]
assert tris.shape[1] == 3
assert tris.shape[2] == 3
assert slips.shape[0] == tris.shape[0]
assert slips.shape[1] == 3

n_obs = obs_pts.shape[0]
n_src = src_tris.shape[0]
n_src = tris.shape[0]
block_size = 8
n_obs_blocks = int(np.ceil(n_obs / block_size))
n_src_blocks = int(np.ceil(n_src / block_size))
gpu_config = dict(block_size=block_size, float_type=cluda.np_to_c_type(float_type))
module = cluda.load_gpu("fullspace.cu", tmpl_args=gpu_config, tmpl_dir=source_dir)

if obs_pts.flags.f_contiguous:
obs_pts = obs_pts.copy()
if tris.flags.f_contiguous:
tris = tris.copy()
if slips.flags.f_contiguous:
slips = slips.copy()

gpu_results = cluda.empty_gpu(n_obs * n_src * out_dim, float_type)
gpu_obs_pts = cluda.to_gpu(obs_pts, float_type)
gpu_src_tris = cluda.to_gpu(src_tris, float_type)
gpu_tris = cluda.to_gpu(tris, float_type)
gpu_slips = cluda.to_gpu(slips, float_type)

getattr(module, fnc_name + "_all_pairs")(
gpu_results,
np.int32(n_obs),
np.int32(n_src),
gpu_obs_pts,
gpu_src_tris,
gpu_tris,
gpu_slips,
float_type(nu),
grid=(n_obs_blocks, n_src_blocks, 1),
Expand All @@ -78,21 +95,21 @@ def call_clu_all_pairs(obs_pts, src_tris, slips, nu, fnc_name, out_dim, float_ty
return out


def clu_disp(obs_pts, tris, slips, nu, float_dtype=np.float32):
def disp(obs_pts, tris, slips, nu, float_dtype=np.float32):
return call_clu(obs_pts, tris, slips, nu, "disp_fullspace", 3, float_dtype)


def clu_strain(obs_pts, tris, slips, nu, float_dtype=np.float32):
def strain(obs_pts, tris, slips, nu, float_dtype=np.float32):
return call_clu(obs_pts, tris, slips, nu, "strain_fullspace", 6, float_dtype)


def clu_disp_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32):
def disp_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32):
return call_clu_all_pairs(
obs_pts, tris, slips, nu, "disp_fullspace", 3, float_dtype
)


def clu_strain_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32):
def strain_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32):
return call_clu_all_pairs(
obs_pts, tris, slips, nu, "strain_fullspace", 6, float_dtype
)
Expand Down
29 changes: 21 additions & 8 deletions tests/test_tde.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import scipy.io

import cutde.fullspace
import cutde


def get_pt_grid():
Expand Down Expand Up @@ -40,7 +40,7 @@ def py_tde_tester(setup_fnc, N_test=-1):
start = time.time()
for i in range(N_test):
pt = test_pts[i, :]
results[i, :] = cutde.fullspace.py_disp(pt, tri, slip, 0.25)
results[i, :] = cutde.py_disp(pt, tri, slip, 0.25)
np.testing.assert_almost_equal(results[i, 0], correct["UEf"][i, 0])
np.testing.assert_almost_equal(results[i, 1], correct["UNf"][i, 0])
np.testing.assert_almost_equal(results[i, 2], correct["UVf"][i, 0])
Expand Down Expand Up @@ -70,16 +70,22 @@ def cluda_tde_tester(setup_fnc):
tris = np.array([tri] * N_test)
slips = np.array([slip] * N_test)

disp = cutde.fullspace.clu_disp(test_pts[:N_test], tris, slips, 0.25, np.float64)
strain = cutde.fullspace.clu_strain(test_pts[:N_test], tris, slips, nu, np.float64)
stress = cutde.fullspace.strain_to_stress(strain, sm, nu)
disp = cutde.disp(test_pts[:N_test], tris, slips, 0.25, np.float64)
strain = cutde.strain(test_pts[:N_test], tris, slips, nu, np.float64)
stress = cutde.strain_to_stress(strain, sm, nu)

np.testing.assert_almost_equal(disp[:, 0], correct["UEf"][:N_test, 0])
np.testing.assert_almost_equal(disp[:, 1], correct["UNf"][:N_test, 0])
np.testing.assert_almost_equal(disp[:, 2], correct["UVf"][:N_test, 0])
np.testing.assert_almost_equal(strain, correct["Strain"][:N_test])
np.testing.assert_almost_equal(stress, correct["Stress"][:N_test])

test_ptsF = np.asfortranarray(test_pts[:N_test])
trisF = np.asfortranarray(tris)
slipsF = np.asfortranarray(slips)
dispF = cutde.disp(test_ptsF, trisF, slipsF, 0.25, np.float64)
np.testing.assert_almost_equal(disp, dispF)


def test_cluda_simple():
cluda_tde_tester(get_simple_test)
Expand All @@ -96,6 +102,13 @@ def test_all_pairs():
strain1 = np.empty((n_obs, n_src, 6))
for i in range(n_obs):
tiled_pt = np.tile(pts[i, np.newaxis, :], (tris.shape[0], 1))
strain1[i] = cutde.fullspace.clu_strain(tiled_pt, tris, slips, 0.25)
strain2 = cutde.fullspace.clu_strain_all_pairs(pts, tris, slips, 0.25)
np.testing.assert_almost_equal(strain1, strain2)
strain1[i] = cutde.strain(tiled_pt, tris, slips, 0.25)
strain2 = cutde.strain_all_pairs(pts, tris, slips, 0.25)
strain3 = cutde.strain_all_pairs(
np.asfortranarray(pts),
np.asfortranarray(tris),
np.asfortranarray(slips),
0.25,
)
np.testing.assert_almost_equal(strain1, strain2)
np.testing.assert_almost_equal(strain2, strain3)

0 comments on commit fca4dd8

Please sign in to comment.