From dd34ad4c5cd93078937fae8d071590d9ae585ab8 Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Wed, 14 Feb 2024 20:23:17 +0100
Subject: [PATCH] make all args except X, y, sample_weight, offset keyword
 only; make initialization keyword only

---
 src/glum/_glm.py      | 157 +++++++++++++++++++++++-------------------
 src/glum/_glm_cv.py   |   6 +-
 tests/glm/test_glm.py |   6 +-
 3 files changed, 94 insertions(+), 75 deletions(-)

diff --git a/src/glum/_glm.py b/src/glum/_glm.py
index bb9b6ae9..67eb8332 100644
--- a/src/glum/_glm.py
+++ b/src/glum/_glm.py
@@ -730,6 +730,7 @@ class GeneralizedLinearRegressorBase(BaseEstimator, RegressorMixin):
 
     def __init__(
         self,
+        *,
         l1_ratio: float = 0,
         P1="identity",
         P2: Union[str, np.ndarray, sparse.spmatrix] = "identity",
@@ -1204,7 +1205,7 @@ def _solve_regularization_path(
         return self.coef_path_
 
     def report_diagnostics(
-        self, full_report: bool = False, custom_columns: Optional[Iterable] = None
+        self, *, full_report: bool = False, custom_columns: Optional[Iterable] = None
     ) -> None:
         """Print diagnostics to ``stdout``.
 
@@ -1218,7 +1219,9 @@ def report_diagnostics(
         custom_columns : iterable, optional (default=None)
             Print only the specified columns.
         """
-        diagnostics = self.get_formatted_diagnostics(full_report, custom_columns)
+        diagnostics = self.get_formatted_diagnostics(
+            full_report=full_report, custom_columns=custom_columns
+        )
         if isinstance(diagnostics, str):
             print(diagnostics)
             return
@@ -1230,9 +1233,9 @@ def report_diagnostics(
             print(diagnostics)
 
     def get_formatted_diagnostics(
-        self, full_report: bool = False, custom_columns: Optional[Iterable] = None
+        self, *, full_report: bool = False, custom_columns: Optional[Iterable] = None
     ) -> Union[str, pd.DataFrame]:
-        """Get formatted diagnostics; can be printed with _report_diagnostics.
+        """Get formatted diagnostics which can be printed with report_diagnostics.
 
         Parameters
         ----------
@@ -1290,6 +1293,7 @@ def linear_predictor(
         self,
         X: ArrayLike,
         offset: Optional[ArrayLike] = None,
+        *,
         alpha_index: Optional[Union[int, Sequence[int]]] = None,
         alpha: Optional[Union[float, Sequence[float]]] = None,
         context: Optional[Union[int, Mapping[str, Any]]] = 0,
@@ -1378,6 +1382,7 @@ def predict(
         X: ShapedArrayLike,
         sample_weight: Optional[ArrayLike] = None,
         offset: Optional[ArrayLike] = None,
+        *,
         alpha_index: Optional[Union[int, Sequence[int]]] = None,
         alpha: Optional[Union[float, Sequence[float]]] = None,
         context: Optional[Union[int, Mapping[str, Any]]] = 0,
@@ -1437,12 +1442,13 @@ def predict(
 
     def coef_table(
         self,
-        confidence_level=0.95,
         X=None,
         y=None,
-        mu=None,
-        offset=None,
         sample_weight=None,
+        offset=None,
+        *,
+        confidence_level=0.95,
+        mu=None,
         dispersion=None,
         robust=None,
         clusters: np.ndarray = None,
@@ -1540,16 +1546,17 @@ def coef_table(
 
     def wald_test(
         self,
+        X=None,
+        y=None,
+        sample_weight=None,
+        offset=None,
+        *,
         R: Optional[np.ndarray] = None,
         features: Optional[Union[str, list[str]]] = None,
         terms: Optional[Union[str, list[str]]] = None,
         formula: Optional[str] = None,
         r: Optional[Sequence] = None,
-        X=None,
-        y=None,
         mu=None,
-        offset=None,
-        sample_weight=None,
         dispersion=None,
         robust=None,
         clusters: np.ndarray = None,
@@ -1572,6 +1579,16 @@ def wald_test(
 
         Parameters
         ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features), optional
+            Training data. Can be omitted if a covariance matrix has already
+            been computed.
+        y : array-like, shape (n_samples,), optional
+            Target values. Can be omitted if a covariance matrix has already
+            been computed.
+        sample_weight : array-like, shape (n_samples,), optional, default=None
+            Individual weights for each sample.
+        offset : array-like, optional, default=None
+            Array with additive offsets.
         R : np.ndarray, optional, default=None
             The restriction matrix representing the linear combination of coefficients
             to test.
@@ -1588,18 +1605,8 @@ def wald_test(
             The vector representing the values of the linear combination.
             If None, the test is for whether the linear combinations of the coefficients
             are zero.
-        X : {array-like, sparse matrix}, shape (n_samples, n_features), optional
-            Training data. Can be omitted if a covariance matrix has already
-            been computed.
-        y : array-like, shape (n_samples,), optional
-            Target values. Can be omitted if a covariance matrix has already
-            been computed.
         mu : array-like, optional, default=None
             Array with predictions. Estimated if absent.
-        offset : array-like, optional, default=None
-            Array with additive offsets.
-        sample_weight : array-like, shape (n_samples,), optional, default=None
-            Individual weights for each sample.
         dispersion : float, optional, default=None
             The dispersion parameter. Estimated if absent.
         robust : boolean, optional, default=None
@@ -1647,9 +1654,9 @@ def wald_test(
                 r=r,
                 X=X,
                 y=y,
-                mu=mu,
-                offset=offset,
                 sample_weight=sample_weight,
+                offset=offset,
+                mu=mu,
                 dispersion=dispersion,
                 robust=robust,
                 clusters=clusters,
@@ -1663,9 +1670,9 @@ def wald_test(
                 values=r,
                 X=X,
                 y=y,
-                mu=mu,
-                offset=offset,
                 sample_weight=sample_weight,
+                offset=offset,
+                mu=mu,
                 dispersion=dispersion,
                 robust=robust,
                 clusters=clusters,
@@ -1679,9 +1686,9 @@ def wald_test(
                 values=r,
                 X=X,
                 y=y,
-                mu=mu,
-                offset=offset,
                 sample_weight=sample_weight,
+                offset=offset,
+                mu=mu,
                 dispersion=dispersion,
                 robust=robust,
                 clusters=clusters,
@@ -1696,9 +1703,9 @@ def wald_test(
                 formula=formula,
                 X=X,
                 y=y,
-                mu=mu,
-                offset=offset,
                 sample_weight=sample_weight,
+                offset=offset,
+                mu=mu,
                 dispersion=dispersion,
                 robust=robust,
                 clusters=clusters,
@@ -1714,9 +1721,9 @@ def _wald_test_matrix(
         r: Optional[np.ndarray] = None,
         X=None,
         y=None,
-        mu=None,
-        offset=None,
         sample_weight=None,
+        offset=None,
+        mu=None,
         dispersion=None,
         robust=None,
         clusters: np.ndarray = None,
@@ -1744,12 +1751,12 @@ def _wald_test_matrix(
         y : array-like, shape (n_samples,), optional
             Target values. Can be omitted if a covariance matrix has already
             been computed.
-        mu : array-like, optional, default=None
-            Array with predictions. Estimated if absent.
-        offset : array-like, optional, default=None
-            Array with additive offsets.
         sample_weight : array-like, shape (n_samples,), optional, default=None
             Individual weights for each sample.
+        offset : array-like, optional, default=None
+            Array with additive offsets.
+        mu : array-like, optional, default=None
+            Array with predictions. Estimated if absent.
         dispersion : float, optional, default=None
             The dispersion parameter. Estimated if absent.
         robust : boolean, optional, default=None
@@ -1774,9 +1781,9 @@ def _wald_test_matrix(
         covariance_matrix = self.covariance_matrix(
             X=X,
             y=y,
-            mu=mu,
-            offset=offset,
             sample_weight=sample_weight,
+            offset=offset,
+            mu=mu,
             dispersion=dispersion,
             robust=robust,
             clusters=clusters,
@@ -1822,9 +1829,9 @@ def _wald_test_feature_names(
         values: Optional[Sequence] = None,
         X=None,
         y=None,
-        mu=None,
-        offset=None,
         sample_weight=None,
+        offset=None,
+        mu=None,
         dispersion=None,
         robust=None,
         clusters: np.ndarray = None,
@@ -1849,12 +1856,12 @@ def _wald_test_feature_names(
         y : array-like, shape (n_samples,), optional
             Target values. Can be omitted if a covariance matrix has already
             been computed.
-        mu : array-like, optional, default=None
-            Array with predictions. Estimated if absent.
-        offset : array-like, optional, default=None
-            Array with additive offsets.
         sample_weight : array-like, shape (n_samples,), optional, default=None
             Individual weights for each sample.
+        offset : array-like, optional, default=None
+            Array with additive offsets.
+        mu : array-like, optional, default=None
+            Array with predictions. Estimated if absent.
         dispersion : float, optional, default=None
             The dispersion parameter. Estimated if absent.
         robust : boolean, optional, default=None
@@ -1906,9 +1913,9 @@ def _wald_test_feature_names(
             r=r,
             X=X,
             y=y,
-            mu=mu,
-            offset=offset,
             sample_weight=sample_weight,
+            offset=offset,
+            mu=mu,
             dispersion=dispersion,
             robust=robust,
             clusters=clusters,
@@ -1921,9 +1928,9 @@ def _wald_test_formula(
         formula: str,
         X=None,
         y=None,
-        mu=None,
-        offset=None,
         sample_weight=None,
+        offset=None,
+        mu=None,
         dispersion=None,
         robust=None,
         clusters: np.ndarray = None,
@@ -1945,12 +1952,12 @@ def _wald_test_formula(
         y : array-like, shape (n_samples,), optional
             Target values. Can be omitted if a covariance matrix has already
             been computed.
-        mu : array-like, optional, default=None
-            Array with predictions. Estimated if absent.
-        offset : array-like, optional, default=None
-            Array with additive offsets.
         sample_weight : array-like, shape (n_samples,), optional, default=None
             Individual weights for each sample.
+        offset : array-like, optional, default=None
+            Array with additive offsets.
+        mu : array-like, optional, default=None
+            Array with predictions. Estimated if absent.
         dispersion : float, optional, default=None
             The dispersion parameter. Estimated if absent.
         robust : boolean, optional, default=None
@@ -1986,9 +1993,9 @@ def _wald_test_formula(
             r=r,
             X=X,
             y=y,
-            mu=mu,
-            offset=offset,
             sample_weight=sample_weight,
+            offset=offset,
+            mu=mu,
             dispersion=dispersion,
             robust=robust,
             clusters=clusters,
@@ -2002,9 +2009,9 @@ def _wald_test_term_names(
         values: Optional[Sequence] = None,
         X=None,
         y=None,
-        mu=None,
-        offset=None,
         sample_weight=None,
+        offset=None,
+        mu=None,
         dispersion=None,
         robust=None,
         clusters: np.ndarray = None,
@@ -2034,12 +2041,12 @@ def _wald_test_term_names(
         y : array-like, shape (n_samples,), optional
             Target values. Can be omitted if a covariance matrix has already
             been computed.
-        mu : array-like, optional, default=None
-            Array with predictions. Estimated if absent.
-        offset : array-like, optional, default=None
-            Array with additive offsets.
         sample_weight : array-like, shape (n_samples,), optional (default=None)
             Individual weights for each sample.
+        offset : array-like, optional, default=None
+            Array with additive offsets.
+        mu : array-like, optional, default=None
+            Array with predictions. Estimated if absent.
         dispersion : float, optional, default=None
             The dispersion parameter. Estimated if absent.
         robust : boolean, optional, default=None
@@ -2101,9 +2108,9 @@ def _wald_test_term_names(
             r=r,
             X=X,
             y=y,
-            mu=mu,
-            offset=offset,
             sample_weight=sample_weight,
+            offset=offset,
+            mu=mu,
             dispersion=dispersion,
             robust=robust,
             clusters=clusters,
@@ -2115,9 +2122,10 @@ def std_errors(
         self,
         X=None,
         y=None,
-        mu=None,
-        offset=None,
         sample_weight=None,
+        offset=None,
+        *,
+        mu=None,
         dispersion=None,
         robust=None,
         clusters: np.ndarray = None,
@@ -2138,12 +2146,12 @@ def std_errors(
         y : array-like, shape (n_samples,), optional
             Target values. Can be omitted if a covariance matrix has already
             been computed.
-        mu : array-like, optional, default=None
-            Array with predictions. Estimated if absent.
-        offset : array-like, optional, default=None
-            Array with additive offsets.
         sample_weight : array-like, shape (n_samples,), optional, default=None
             Individual weights for each sample.
+        offset : array-like, optional, default=None
+            Array with additive offsets.
+        mu : array-like, optional, default=None
+            Array with predictions. Estimated if absent.
         dispersion : float, optional, default=None
             The dispersion parameter. Estimated if absent.
         robust : boolean, optional, default=None
@@ -2172,9 +2180,9 @@ def std_errors(
             self.covariance_matrix(
                 X=X,
                 y=y,
-                mu=mu,
-                offset=offset,
                 sample_weight=sample_weight,
+                offset=offset,
+                mu=mu,
                 dispersion=dispersion,
                 robust=robust,
                 clusters=clusters,
@@ -2188,9 +2196,10 @@ def covariance_matrix(
         self,
         X=None,
         y=None,
-        mu=None,
-        offset=None,
         sample_weight=None,
+        offset=None,
+        *,
+        mu=None,
         dispersion=None,
         robust=None,
         clusters: Optional[np.ndarray] = None,
@@ -2480,6 +2489,7 @@ def score(
         y: ShapedArrayLike,
         sample_weight: Optional[ArrayLike] = None,
         offset: Optional[ArrayLike] = None,
+        *,
         context: Optional[Union[int, Mapping[str, Any]]] = 0,
     ):
         """Compute :math:`D^2`, the percentage of deviance explained.
@@ -3238,6 +3248,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
 
     def __init__(
         self,
+        *,
         alpha=None,
         l1_ratio=0,
         P1="identity",
@@ -3362,6 +3373,7 @@ def fit(
         y: Optional[ArrayLike] = None,
         sample_weight: Optional[ArrayLike] = None,
         offset: Optional[ArrayLike] = None,
+        *,
         store_covariance_matrix: bool = False,
         clusters: Optional[np.ndarray] = None,
         # TODO: take out weights_sum (or use it properly)
@@ -3707,6 +3719,7 @@ def aic(
         X: ArrayLike,
         y: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
+        *,
         context: Optional[Union[int, Mapping[str, Any]]] = 0,
     ):
         """
@@ -3745,6 +3758,7 @@ def aicc(
         X: ArrayLike,
         y: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
+        *,
         context: Optional[Union[int, Mapping[str, Any]]] = 0,
     ):
         """
@@ -3790,6 +3804,7 @@ def bic(
         X: ArrayLike,
         y: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
+        *,
         context: Optional[Union[int, Mapping[str, Any]]] = 0,
     ):
         """
diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py
index 31fd58df..c84fb6f3 100644
--- a/src/glum/_glm_cv.py
+++ b/src/glum/_glm_cv.py
@@ -322,6 +322,7 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
 
     def __init__(
         self,
+        *,
         l1_ratio=0,
         P1="identity",
         P2="identity",
@@ -424,6 +425,7 @@ def fit(
         y: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
         offset: Optional[ArrayLike] = None,
+        *,
         store_covariance_matrix: bool = False,
         clusters: Optional[np.ndarray] = None,
         context: Optional[Union[int, Mapping[str, Any]]] = None,
@@ -531,7 +533,7 @@ def fit(
         else:
             _stype = ["csc", "csr"]
 
-        def fit_path(
+        def _fit_path(
             self,
             train_idx,
             test_idx,
@@ -665,7 +667,7 @@ def _get_deviance(coef):
             return intercept_path_, coef_path_, deviance_path_
 
         jobs = (
-            delayed(fit_path)(
+            delayed(_fit_path)(
                 self,
                 train_idx=train_idx,
                 test_idx=test_idx,
diff --git a/tests/glm/test_glm.py b/tests/glm/test_glm.py
index 08080212..4645f2c2 100644
--- a/tests/glm/test_glm.py
+++ b/tests/glm/test_glm.py
@@ -616,7 +616,9 @@ def test_get_diagnostics(
     glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept, solver=solver)
     res = glm.fit(X, y)
 
-    diagnostics = res.get_formatted_diagnostics(full_report, custom_columns)
+    diagnostics = res.get_formatted_diagnostics(
+        full_report=full_report, custom_columns=custom_columns
+    )
     if solver in ("lbfgs", "trust-constr"):
         assert diagnostics == "solver does not report diagnostics"
     else:
@@ -678,7 +680,7 @@ def test_report_diagnostics(
 
     f = io.StringIO()
     with redirect_stdout(f):
-        res.report_diagnostics(full_report, custom_columns)
+        res.report_diagnostics(full_report=full_report, custom_columns=custom_columns)
     printed = f.getvalue()
     # Something should be printed
     assert len(printed) > 0