Skip to content

Commit fca4dd8

Browse files
committed
Fix fortran ordering bug. Re-name some interface functions.
1 parent f9f1532 commit fca4dd8

File tree

6 files changed

+78
-35
lines changed

6 files changed

+78
-35
lines changed

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ See below for usage and installation instructions.
1818
Howdy! Usage is really simple:
1919

2020
```
21-
import cutde.fullspace
21+
import cutde
2222
23-
disp = cutde.fullspace.clu_disp(pts, tris, slips, 0.25)
24-
strain = cutde.fullspace.clu_strain(pts, tris, slips, nu)
23+
disp = cutde.disp(pts, tris, slips, 0.25)
24+
strain = cutde.strain(pts, tris, slips, nu)
2525
```
2626

2727
* `pts` is a `np.array` with shape `(N, 3)`
@@ -39,7 +39,7 @@ IMPORTANT: N should be the same for all these arrays. There is exactly one trian
3939
Use:
4040

4141
```
42-
stress = cutde.fullspace.strain_to_stress(strain, sm, nu)
42+
stress = cutde.strain_to_stress(strain, sm, nu)
4343
```
4444

4545
to convert from stress to strain assuming isotropic linear elasticity. `sm` is the shear modulus and `nu` is the Poisson ratio.
@@ -49,10 +49,10 @@ to convert from stress to strain assuming isotropic linear elasticity. `sm` is t
4949
If, instead, you want to create a matrix representing the interaction between every observation point and every source triangle, there is a different interface:
5050

5151
```
52-
import cutde.fullspace
52+
import cutde
5353
54-
disp = cutde.fullspace.clu_disp_all_pairs(pts, tris, slips, 0.25)
55-
strain = cutde.fullspace.clu_strain_all_pairs(pts, tris, slips, nu)
54+
disp = cutde.disp_all_pairs(pts, tris, slips, 0.25)
55+
strain = cutde.strain_all_pairs(pts, tris, slips, nu)
5656
```
5757

5858
* `pts` is a `np.array` with shape `(N_OBS_PTS, 3)`

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
21.03.31.1
1+
21.04.01

cutde/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1-
import os
2-
3-
source_dir = os.path.dirname(os.path.realpath(__file__))
1+
from .fullspace import ( # noqa: F401
2+
disp,
3+
disp_all_pairs,
4+
py_disp,
5+
strain,
6+
strain_all_pairs,
7+
strain_to_stress,
8+
)

cutde/fullspace.cu

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ typedef struct Real6 {
2626
Real c;
2727
} Real6;
2828

29+
WITHIN_KERNEL void print(Real x) {
30+
printf("%f \n", x);
31+
}
32+
2933
<%def name="binop(dim, name, op, b_scalar = False)">
3034
<%
3135
b_type = 'Real' if b_scalar else 'Real' + str(dim)
@@ -56,6 +60,14 @@ ${binop(dim, 'sub_scalar','-',b_scalar = True)}
5660
${binop(dim, 'mul_scalar','*',b_scalar = True)}
5761
${binop(dim, 'div_scalar','/',b_scalar = True)}
5862

63+
WITHIN_KERNEL void print${dim}(Real${dim} x) {
64+
<%
65+
format_str = "%f " * dim
66+
var_str = ','.join(['x.' + comp(d) for d in range(dim)])
67+
%>
68+
printf("${format_str} \n", ${var_str});
69+
}
70+
5971
WITHIN_KERNEL Real sum${dim}(Real${dim} x) {
6072
Real out = 0.0;
6173
% for d in range(dim):
@@ -160,10 +172,6 @@ WITHIN_KERNEL Real6 tensor_transform3(Real3 a, Real3 b, Real3 c, Real6 tensor) {
160172
return out;
161173
}
162174

163-
WITHIN_KERNEL void print_vec(Real3 x) {
164-
printf("%f, %f, %f\n", x.x, x.y, x.z);
165-
}
166-
167175
WITHIN_KERNEL int trimodefinder(Real3 obs, Real3 tri0, Real3 tri1, Real3 tri2) {
168176
// trimodefinder calculates the normalized barycentric coordinates of
169177
// the points with respect to the TD vertices and specifies the appropriate

cutde/fullspace.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import os
2+
13
import numpy as np
24

35
import cutde.gpu as cluda
46

5-
from . import source_dir
67
from .TDdispFS import TDdispFS
78

9+
source_dir = os.path.dirname(os.path.realpath(__file__))
10+
811

912
def py_disp(obs_pt, tri, slip, nu):
1013
return TDdispFS(obs_pt, tri, slip, nu)
@@ -24,6 +27,13 @@ def call_clu(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type):
2427
gpu_config = dict(block_size=block_size, float_type=cluda.np_to_c_type(float_type))
2528
module = cluda.load_gpu("fullspace.cu", tmpl_args=gpu_config, tmpl_dir=source_dir)
2629

30+
if obs_pts.flags.f_contiguous:
31+
obs_pts = obs_pts.copy()
32+
if tris.flags.f_contiguous:
33+
tris = tris.copy()
34+
if slips.flags.f_contiguous:
35+
slips = slips.copy()
36+
2737
gpu_results = cluda.empty_gpu(n * out_dim, float_type)
2838
gpu_obs_pts = cluda.to_gpu(obs_pts, float_type)
2939
gpu_tris = cluda.to_gpu(tris, float_type)
@@ -43,32 +53,39 @@ def call_clu(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type):
4353
return out
4454

4555

46-
def call_clu_all_pairs(obs_pts, src_tris, slips, nu, fnc_name, out_dim, float_type):
56+
def call_clu_all_pairs(obs_pts, tris, slips, nu, fnc_name, out_dim, float_type):
4757
assert obs_pts.shape[1] == 3
48-
assert src_tris.shape[1] == 3
49-
assert src_tris.shape[2] == 3
50-
assert slips.shape[0] == src_tris.shape[0]
58+
assert tris.shape[1] == 3
59+
assert tris.shape[2] == 3
60+
assert slips.shape[0] == tris.shape[0]
5161
assert slips.shape[1] == 3
5262

5363
n_obs = obs_pts.shape[0]
54-
n_src = src_tris.shape[0]
64+
n_src = tris.shape[0]
5565
block_size = 8
5666
n_obs_blocks = int(np.ceil(n_obs / block_size))
5767
n_src_blocks = int(np.ceil(n_src / block_size))
5868
gpu_config = dict(block_size=block_size, float_type=cluda.np_to_c_type(float_type))
5969
module = cluda.load_gpu("fullspace.cu", tmpl_args=gpu_config, tmpl_dir=source_dir)
6070

71+
if obs_pts.flags.f_contiguous:
72+
obs_pts = obs_pts.copy()
73+
if tris.flags.f_contiguous:
74+
tris = tris.copy()
75+
if slips.flags.f_contiguous:
76+
slips = slips.copy()
77+
6178
gpu_results = cluda.empty_gpu(n_obs * n_src * out_dim, float_type)
6279
gpu_obs_pts = cluda.to_gpu(obs_pts, float_type)
63-
gpu_src_tris = cluda.to_gpu(src_tris, float_type)
80+
gpu_tris = cluda.to_gpu(tris, float_type)
6481
gpu_slips = cluda.to_gpu(slips, float_type)
6582

6683
getattr(module, fnc_name + "_all_pairs")(
6784
gpu_results,
6885
np.int32(n_obs),
6986
np.int32(n_src),
7087
gpu_obs_pts,
71-
gpu_src_tris,
88+
gpu_tris,
7289
gpu_slips,
7390
float_type(nu),
7491
grid=(n_obs_blocks, n_src_blocks, 1),
@@ -78,21 +95,21 @@ def call_clu_all_pairs(obs_pts, src_tris, slips, nu, fnc_name, out_dim, float_ty
7895
return out
7996

8097

81-
def clu_disp(obs_pts, tris, slips, nu, float_dtype=np.float32):
98+
def disp(obs_pts, tris, slips, nu, float_dtype=np.float32):
8299
return call_clu(obs_pts, tris, slips, nu, "disp_fullspace", 3, float_dtype)
83100

84101

85-
def clu_strain(obs_pts, tris, slips, nu, float_dtype=np.float32):
102+
def strain(obs_pts, tris, slips, nu, float_dtype=np.float32):
86103
return call_clu(obs_pts, tris, slips, nu, "strain_fullspace", 6, float_dtype)
87104

88105

89-
def clu_disp_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32):
106+
def disp_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32):
90107
return call_clu_all_pairs(
91108
obs_pts, tris, slips, nu, "disp_fullspace", 3, float_dtype
92109
)
93110

94111

95-
def clu_strain_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32):
112+
def strain_all_pairs(obs_pts, tris, slips, nu, float_dtype=np.float32):
96113
return call_clu_all_pairs(
97114
obs_pts, tris, slips, nu, "strain_fullspace", 6, float_dtype
98115
)

tests/test_tde.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import scipy.io
55

6-
import cutde.fullspace
6+
import cutde
77

88

99
def get_pt_grid():
@@ -40,7 +40,7 @@ def py_tde_tester(setup_fnc, N_test=-1):
4040
start = time.time()
4141
for i in range(N_test):
4242
pt = test_pts[i, :]
43-
results[i, :] = cutde.fullspace.py_disp(pt, tri, slip, 0.25)
43+
results[i, :] = cutde.py_disp(pt, tri, slip, 0.25)
4444
np.testing.assert_almost_equal(results[i, 0], correct["UEf"][i, 0])
4545
np.testing.assert_almost_equal(results[i, 1], correct["UNf"][i, 0])
4646
np.testing.assert_almost_equal(results[i, 2], correct["UVf"][i, 0])
@@ -70,16 +70,22 @@ def cluda_tde_tester(setup_fnc):
7070
tris = np.array([tri] * N_test)
7171
slips = np.array([slip] * N_test)
7272

73-
disp = cutde.fullspace.clu_disp(test_pts[:N_test], tris, slips, 0.25, np.float64)
74-
strain = cutde.fullspace.clu_strain(test_pts[:N_test], tris, slips, nu, np.float64)
75-
stress = cutde.fullspace.strain_to_stress(strain, sm, nu)
73+
disp = cutde.disp(test_pts[:N_test], tris, slips, 0.25, np.float64)
74+
strain = cutde.strain(test_pts[:N_test], tris, slips, nu, np.float64)
75+
stress = cutde.strain_to_stress(strain, sm, nu)
7676

7777
np.testing.assert_almost_equal(disp[:, 0], correct["UEf"][:N_test, 0])
7878
np.testing.assert_almost_equal(disp[:, 1], correct["UNf"][:N_test, 0])
7979
np.testing.assert_almost_equal(disp[:, 2], correct["UVf"][:N_test, 0])
8080
np.testing.assert_almost_equal(strain, correct["Strain"][:N_test])
8181
np.testing.assert_almost_equal(stress, correct["Stress"][:N_test])
8282

83+
test_ptsF = np.asfortranarray(test_pts[:N_test])
84+
trisF = np.asfortranarray(tris)
85+
slipsF = np.asfortranarray(slips)
86+
dispF = cutde.disp(test_ptsF, trisF, slipsF, 0.25, np.float64)
87+
np.testing.assert_almost_equal(disp, dispF)
88+
8389

8490
def test_cluda_simple():
8591
cluda_tde_tester(get_simple_test)
@@ -96,6 +102,13 @@ def test_all_pairs():
96102
strain1 = np.empty((n_obs, n_src, 6))
97103
for i in range(n_obs):
98104
tiled_pt = np.tile(pts[i, np.newaxis, :], (tris.shape[0], 1))
99-
strain1[i] = cutde.fullspace.clu_strain(tiled_pt, tris, slips, 0.25)
100-
strain2 = cutde.fullspace.clu_strain_all_pairs(pts, tris, slips, 0.25)
101-
np.testing.assert_almost_equal(strain1, strain2)
105+
strain1[i] = cutde.strain(tiled_pt, tris, slips, 0.25)
106+
strain2 = cutde.strain_all_pairs(pts, tris, slips, 0.25)
107+
strain3 = cutde.strain_all_pairs(
108+
np.asfortranarray(pts),
109+
np.asfortranarray(tris),
110+
np.asfortranarray(slips),
111+
0.25,
112+
)
113+
np.testing.assert_almost_equal(strain1, strain2)
114+
np.testing.assert_almost_equal(strain2, strain3)

0 commit comments

Comments
 (0)