diff --git a/README.md b/README.md index 73cb078..b16a05f 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,10 @@ See below for usage and installation instructions. Howdy! Usage is really simple: ``` -import cutde.fullspace +import cutde -disp = cutde.fullspace.clu_disp(pts, tris, slips, 0.25) -strain = cutde.fullspace.clu_strain(pts, tris, slips, nu) +disp = cutde.disp(pts, tris, slips, 0.25) +strain = cutde.strain(pts, tris, slips, nu) ``` * `pts` is a `np.array` with shape `(N, 3)` @@ -39,7 +39,7 @@ IMPORTANT: N should be the same for all these arrays. There is exactly one trian Use: ``` -stress = cutde.fullspace.strain_to_stress(strain, sm, nu) +stress = cutde.strain_to_stress(strain, sm, nu) ``` to convert from stress to strain assuming isotropic linear elasticity. `sm` is the shear modulus and `nu` is the Poisson ratio. @@ -49,10 +49,10 @@ to convert from stress to strain assuming isotropic linear elasticity. `sm` is t If, instead, you want to create a matrix representing the interaction between every observation point and every source triangle, there is a different interface: ``` -import cutde.fullspace +import cutde -disp = cutde.fullspace.clu_disp_all_pairs(pts, tris, slips, 0.25) -strain = cutde.fullspace.clu_strain_all_pairs(pts, tris, slips, nu) +disp = cutde.disp_all_pairs(pts, tris, slips, 0.25) +strain = cutde.strain_all_pairs(pts, tris, slips, nu) ``` * `pts` is a `np.array` with shape `(N_OBS_PTS, 3)` diff --git a/VERSION b/VERSION index 9a9a678..225093b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -21.03.31.1 \ No newline at end of file +21.04.01 \ No newline at end of file diff --git a/cutde/__init__.py b/cutde/__init__.py index 9db6ea3..6e372c7 100644 --- a/cutde/__init__.py +++ b/cutde/__init__.py @@ -1,3 +1,8 @@ -import os - -source_dir = os.path.dirname(os.path.realpath(__file__)) +from .fullspace import ( # noqa: F401 + disp, + disp_all_pairs, + py_disp, + strain, + strain_all_pairs, + strain_to_stress, +) diff --git a/cutde/fullspace.cu b/cutde/fullspace.cu index d893979..3ab56ac 100644 --- a/cutde/fullspace.cu +++ b/cutde/fullspace.cu @@ -26,6 +26,10 @@ typedef struct Real6 { Real c; } Real6; +WITHIN_KERNEL void print(Real x) { + printf("%f \n", x); +} + <%def name="binop(dim, name, op, b_scalar = False)"> <% b_type = 'Real' if b_scalar else 'Real' + str(dim) @@ -56,6 +60,14 @@ ${binop(dim, 'sub_scalar','-',b_scalar = True)} ${binop(dim, 'mul_scalar','*',b_scalar = True)} ${binop(dim, 'div_scalar','/',b_scalar = True)} +WITHIN_KERNEL void print${dim}(Real${dim} x) { + <% + format_str = "%f " * dim + var_str = ','.join(['x.' + comp(d) for d in range(dim)]) + %> + printf("${format_str} \n", ${var_str}); +} + WITHIN_KERNEL Real sum${dim}(Real${dim} x) { Real out = 0.0; % for d in range(dim): @@ -160,10 +172,6 @@ WITHIN_KERNEL Real6 tensor_transform3(Real3 a, Real3 b, Real3 c, Real6 tensor) { return out; } -WITHIN_KERNEL void print_vec(Real3 x) { - printf("%f, %f, %f\n", x.x, x.y, x.z); -} - WITHIN_KERNEL int trimodefinder(Real3 obs, Real3 tri0, Real3 tri1, Real3 tri2) { // trimodefinder calculates the normalized barycentric coordinates of // the points with respect to the TD vertices and specifies the appropriate diff --git a/cutde/fullspace.py b/cutde/fullspace.py index e4b3254..2036fb9 100644 --- a/cutde/fullspace.py +++ b/cutde/fullspace.py @@ -1,10 +1,13 @@ +import os + import numpy as np import cutde.gpu as cluda -from . import source_dir from .TDdispFS import TDdispFS +source_dir = os.path.dirname(os.path.realpath(__file__)) + def py_disp(obs_pt, tri, slip, nu): return TDdispFS(obs_pt, tri, slip, nu) @@ -24,6 +27,13 @@ def call_clu(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type): gpu_config = dict(block_size=block_size, float_type=cluda.np_to_c_type(float_type)) module = cluda.load_gpu("fullspace.cu", tmpl_args=gpu_config, tmpl_dir=source_dir) + if obs_pts.flags.f_contiguous: + obs_pts = obs_pts.copy() + if tris.flags.f_contiguous: + tris = tris.copy() + if slips.flags.f_contiguous: + slips = slips.copy() + gpu_results = cluda.empty_gpu(n * out_dim, float_type) gpu_obs_pts = cluda.to_gpu(obs_pts, float_type) gpu_tris = cluda.to_gpu(tris, float_type) @@ -43,24 +53,31 @@ def call_clu(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type): return out -def call_clu_all_pairs(obs_pts, src_tris, slips, nu, fnc_name, out_dim, float_type): +def call_clu_all_pairs(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type): assert obs_pts.shape[1] == 3 - assert src_tris.shape[1] == 3 - assert src_tris.shape[2] == 3 - assert slips.shape[0] == src_tris.shape[0] + assert tris.shape[1] == 3 + assert tris.shape[2] == 3 + assert slips.shape[0] == tris.shape[0] assert slips.shape[1] == 3 n_obs = obs_pts.shape[0] - n_src = src_tris.shape[0] + n_src = tris.shape[0] block_size = 8 n_obs_blocks = int(np.ceil(n_obs / block_size)) n_src_blocks = int(np.ceil(n_src / block_size)) gpu_config = dict(block_size=block_size, float_type=cluda.np_to_c_type(float_type)) module = cluda.load_gpu("fullspace.cu", tmpl_args=gpu_config, tmpl_dir=source_dir) + if obs_pts.flags.f_contiguous: + obs_pts = obs_pts.copy() + if tris.flags.f_contiguous: + tris = tris.copy() + if slips.flags.f_contiguous: + slips = slips.copy() + gpu_results = cluda.empty_gpu(n_obs * n_src * out_dim, float_type) gpu_obs_pts = cluda.to_gpu(obs_pts, float_type) - gpu_src_tris = cluda.to_gpu(src_tris, float_type) + gpu_tris = cluda.to_gpu(tris, float_type) gpu_slips = cluda.to_gpu(slips, float_type) getattr(module, fnc_name + "_all_pairs")( @@ -68,7 +85,7 @@ def call_clu_all_pairs(obs_pts, src_tris, slips, nu, fnc_name, out_dim, float_ty np.int32(n_obs), np.int32(n_src), gpu_obs_pts, - gpu_src_tris, + gpu_tris, gpu_slips, float_type(nu), grid=(n_obs_blocks, n_src_blocks, 1), @@ -78,21 +95,21 @@ def call_clu_all_pairs(obs_pts, src_tris, slips, nu, fnc_name, out_dim, float_ty return out -def clu_disp(obs_pts, tris, slips, nu, float_dtype=np.float32): +def disp(obs_pts, tris, slips, nu, float_dtype=np.float32): return call_clu(obs_pts, tris, slips, nu, "disp_fullspace", 3, float_dtype) -def clu_strain(obs_pts, tris, slips, nu, float_dtype=np.float32): +def strain(obs_pts, tris, slips, nu, float_dtype=np.float32): return call_clu(obs_pts, tris, slips, nu, "strain_fullspace", 6, float_dtype) -def clu_disp_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32): +def disp_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32): return call_clu_all_pairs( obs_pts, tris, slips, nu, "disp_fullspace", 3, float_dtype ) -def clu_strain_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32): +def strain_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32): return call_clu_all_pairs( obs_pts, tris, slips, nu, "strain_fullspace", 6, float_dtype ) diff --git a/tests/test_tde.py b/tests/test_tde.py index 265a7f7..79fef42 100644 --- a/tests/test_tde.py +++ b/tests/test_tde.py @@ -3,7 +3,7 @@ import numpy as np import scipy.io -import cutde.fullspace +import cutde def get_pt_grid(): @@ -40,7 +40,7 @@ def py_tde_tester(setup_fnc, N_test=-1): start = time.time() for i in range(N_test): pt = test_pts[i, :] - results[i, :] = cutde.fullspace.py_disp(pt, tri, slip, 0.25) + results[i, :] = cutde.py_disp(pt, tri, slip, 0.25) np.testing.assert_almost_equal(results[i, 0], correct["UEf"][i, 0]) np.testing.assert_almost_equal(results[i, 1], correct["UNf"][i, 0]) np.testing.assert_almost_equal(results[i, 2], correct["UVf"][i, 0]) @@ -70,9 +70,9 @@ def cluda_tde_tester(setup_fnc): tris = np.array([tri] * N_test) slips = np.array([slip] * N_test) - disp = cutde.fullspace.clu_disp(test_pts[:N_test], tris, slips, 0.25, np.float64) - strain = cutde.fullspace.clu_strain(test_pts[:N_test], tris, slips, nu, np.float64) - stress = cutde.fullspace.strain_to_stress(strain, sm, nu) + disp = cutde.disp(test_pts[:N_test], tris, slips, 0.25, np.float64) + strain = cutde.strain(test_pts[:N_test], tris, slips, nu, np.float64) + stress = cutde.strain_to_stress(strain, sm, nu) np.testing.assert_almost_equal(disp[:, 0], correct["UEf"][:N_test, 0]) np.testing.assert_almost_equal(disp[:, 1], correct["UNf"][:N_test, 0]) @@ -80,6 +80,12 @@ def cluda_tde_tester(setup_fnc): np.testing.assert_almost_equal(strain, correct["Strain"][:N_test]) np.testing.assert_almost_equal(stress, correct["Stress"][:N_test]) + test_ptsF = np.asfortranarray(test_pts[:N_test]) + trisF = np.asfortranarray(tris) + slipsF = np.asfortranarray(slips) + dispF = cutde.disp(test_ptsF, trisF, slipsF, 0.25, np.float64) + np.testing.assert_almost_equal(disp, dispF) + def test_cluda_simple(): cluda_tde_tester(get_simple_test) @@ -96,6 +102,13 @@ def test_all_pairs(): strain1 = np.empty((n_obs, n_src, 6)) for i in range(n_obs): tiled_pt = np.tile(pts[i, np.newaxis, :], (tris.shape[0], 1)) - strain1[i] = cutde.fullspace.clu_strain(tiled_pt, tris, slips, 0.25) - strain2 = cutde.fullspace.clu_strain_all_pairs(pts, tris, slips, 0.25) - np.testing.assert_almost_equal(strain1, strain2) + strain1[i] = cutde.strain(tiled_pt, tris, slips, 0.25) + strain2 = cutde.strain_all_pairs(pts, tris, slips, 0.25) + strain3 = cutde.strain_all_pairs( + np.asfortranarray(pts), + np.asfortranarray(tris), + np.asfortranarray(slips), + 0.25, + ) + np.testing.assert_almost_equal(strain1, strain2) + np.testing.assert_almost_equal(strain2, strain3)