Skip to content

Commit 8c2de99

Browse files
transpose matmul categorical bit reproducibility
1 parent 5340f2d commit 8c2de99

File tree

2 files changed

+40
-10
lines changed

2 files changed

+40
-10
lines changed

src/tabmat/ext/cat_split_helpers-tmpl.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <vector>
2-
2+
#include <omp.h>
33

44
<%def name="transpose_matvec(dropfirst)">
55
template <typename Int, typename F>
@@ -10,24 +10,30 @@ void _transpose_matvec_${dropfirst}(
1010
F* res,
1111
Int res_size
1212
) {
13-
#pragma omp parallel
13+
int num_threads = omp_get_max_threads();
14+
std::vector<F> all_res(num_threads * res_size, 0.0);
15+
#pragma omp parallel shared(all_res)
1416
{
15-
std::vector<F> restemp(res_size, 0.0);
16-
#pragma omp for
17+
int tid = omp_get_thread_num();
18+
F* res_slice = &all_res[tid * res_size];
19+
#pragma omp for
1720
for (Py_ssize_t i = 0; i < n_rows; i++) {
1821
% if dropfirst == 'all_rows_drop_first':
1922
Py_ssize_t col_idx = indices[i] - 1;
2023
if (col_idx != -1) {
21-
restemp[col_idx] += other[i];
24+
res_slice[col_idx] += other[i];
2225
}
2326
% else:
24-
restemp[indices[i]] += other[i];
27+
res_slice[indices[i]] += other[i];
2528
% endif
2629
}
27-
for (Py_ssize_t i = 0; i < res_size; i++) {
28-
# pragma omp atomic
29-
res[i] += restemp[i];
30-
}
30+
#pragma omp for
31+
for (Py_ssize_t i = 0; i < res_size; ++i) {
32+
for (int tid = 0; tid < num_threads; ++tid) {
33+
#pragma omp atomic
34+
res[i] += all_res[tid * res_size + i];
35+
}
36+
}
3137
}
3238
}
3339
</%def>

tests/test_reproducibility.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pytest
4+
5+
import tabmat as tm
6+
7+
N = 100
8+
K = 5
9+
10+
11+
@pytest.fixture
12+
def df():
13+
rng = np.random.default_rng(1234)
14+
return pd.DataFrame(
15+
pd.Categorical(rng.integers(low=0, high=K - 1, size=N), categories=range(K))
16+
)
17+
18+
19+
@pytest.mark.parametrize("cat_threshold", [K, K + 1])
20+
def test_mat_transpose_vec(df, cat_threshold):
21+
rng = np.random.default_rng(1234)
22+
vec = rng.normal(size=N)
23+
mat = tm.from_pandas(df, cat_threshold=cat_threshold)
24+
np.testing.assert_equal(mat.transpose_matvec(vec), mat.transpose_matvec(vec))

0 commit comments

Comments
 (0)