extend external predictions benchmarking to multiple repetitions

SvenKlaassen · SvenKlaassen · commit d9807a85ebf4 · 2024-04-11T16:56:15.000+02:00
diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py
@@ -1,13 +1,11 @@
 import pytest
-import math
 import numpy as np
 import copy
 
 import doubleml as dml
 from doubleml import DoubleMLIRM, DoubleMLData
 from doubleml.datasets import make_irm_data
-from sklearn.linear_model import LinearRegression
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_manual, \
     doubleml_sensitivity_benchmark_manual
@@ -114,18 +112,19 @@ def test_dml_sensitivity_benchmark(dml_sensitivity_multitreat_fixture):
 @pytest.fixture(scope="module")
 def test_dml_benchmark_fixture(benchmarking_set, n_rep):
     random_state = 42
-    x, y, d = make_irm_data(n_obs=10, dim_x=5, theta=0.5, return_type="np.array")
+    x, y, d = make_irm_data(n_obs=50, dim_x=5, theta=0, return_type="np.array")
 
-    classifier_class = RandomForestClassifier
-    regressor_class = RandomForestRegressor
+    classifier_class = LogisticRegression
+    regressor_class = LinearRegression
 
     np.random.seed(3141)
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
     x_list_long = copy.deepcopy(dml_data.x_cols)
     dml_int = DoubleMLIRM(dml_data,
                           ml_m=classifier_class(random_state=random_state),
-                          ml_g=regressor_class(random_state=random_state),
-                          n_folds=2)
+                          ml_g=regressor_class(),
+                          n_folds=2,
+                          n_rep=n_rep)
     dml_int.fit(store_predictions=True)
     dml_int.sensitivity_analysis()
     dml_ext = copy.deepcopy(dml_int)
@@ -136,8 +135,9 @@ def test_dml_benchmark_fixture(benchmarking_set, n_rep):
     dml_data_short.x_cols = [x for x in x_list_long if x not in benchmarking_set]
     dml_short = DoubleMLIRM(dml_data_short,
                             ml_m=classifier_class(random_state=random_state),
-                            ml_g=regressor_class(random_state=random_state),
-                            n_folds=2)
+                            ml_g=regressor_class(),
+                            n_folds=2,
+                            n_rep=n_rep)
     dml_short.fit(store_predictions=True)
     fit_args = {"external_predictions": {"d": {"ml_m": dml_short.predictions["ml_m"][:, :, 0],
                                                "ml_g0": dml_short.predictions["ml_g0"][:, :, 0],
@@ -148,15 +148,15 @@ def test_dml_benchmark_fixture(benchmarking_set, n_rep):
     dml_ext.sensitivity_analysis()
     df_bm_ext = dml_ext.sensitivity_benchmark(benchmarking_set=benchmarking_set, fit_args=fit_args)
 
-    res_dict = {"default_benchmark": df_bm.loc["d", "delta_theta"],
-                "external_benchmark": df_bm_ext.loc["d", "delta_theta"]}
+    res_dict = {"default_benchmark": df_bm,
+                "external_benchmark": df_bm_ext}
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_sensitivity_external_predictions(test_dml_benchmark_fixture):
-    assert math.isclose(test_dml_benchmark_fixture["default_benchmark"],
-                        test_dml_benchmark_fixture["external_benchmark"],
-                        rel_tol=1e-9,
-                        abs_tol=1e-4)
+    assert np.allclose(test_dml_benchmark_fixture["default_benchmark"],
+                       test_dml_benchmark_fixture["external_benchmark"],
+                       rtol=1e-9,
+                       atol=1e-4)