Open
Description
In [1]: import numpy as np
...: import tabmat
...:
...: n = 5_000
...: p = 1_000
...:
...: rng = np.random.default_rng(0)
...: means = rng.exponential(10, p) ** 2
...: stds = rng.exponential(10, p) ** 2
...:
...: X = rng.uniform(size=(n, p)) * stds + means
...:
...: matrix = tabmat.DenseMatrix(X)
...: standardized_matrix1, emp_mean1, emp_std1 = matrix.standardize(np.ones(n) / n, True, True)
...:
...: emp_mean2 = X.mean(axis=0)
...: emp_std2 = X.std(axis=0)
...: X = (X - emp_mean2) / emp_std2
...: standardized_matrix2 = tabmat.DenseMatrix(X)
...:
...: weights = rng.uniform(size=n)
In [2]: %%timeit
...: sandwich1 = standardized_matrix1.sandwich(weights)
...:
...:
50.9 ms ± 2.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [3]: %%timeit
...: sandwich2 = standardized_matrix2.sandwich(weights)
...:
...:
34.2 ms ± 602 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [4]: %%timeit
...: sandwich3 = X.T @ np.diag(weights) @ X
...:
...:
352 ms ± 24.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [5]: sandwich1 = standardized_matrix1.sandwich(weights)
...: sandwich2 = standardized_matrix2.sandwich(weights)
...: sandwich3 = X.T @ np.diag(weights) @ X
...:
...: print(np.max(np.abs(sandwich1 - sandwich2)))
...: print(np.max(np.abs(sandwich1 - sandwich3)))
...: print(np.max(np.abs(sandwich2 - sandwich3)))
0.06973587287713845
0.0697358728771389
8.881784197001252e-16
In [6]: print(np.max(np.abs(sandwich1.T - sandwich1)))
0.0
This is including #408. I guess centering the data explicitly results in an additional copy.
Metadata
Metadata
Assignees
Labels
No labels