From dd34ad4c5cd93078937fae8d071590d9ae585ab8 Mon Sep 17 00:00:00 2001 From: Matthias Schmidtblaicher Date: Wed, 14 Feb 2024 20:23:17 +0100 Subject: [PATCH] make all args except X, y, sample_weight, offset keyword only; make initialization keyword only --- src/glum/_glm.py | 157 +++++++++++++++++++++++------------------- src/glum/_glm_cv.py | 6 +- tests/glm/test_glm.py | 6 +- 3 files changed, 94 insertions(+), 75 deletions(-) diff --git a/src/glum/_glm.py b/src/glum/_glm.py index bb9b6ae9..67eb8332 100644 --- a/src/glum/_glm.py +++ b/src/glum/_glm.py @@ -730,6 +730,7 @@ class GeneralizedLinearRegressorBase(BaseEstimator, RegressorMixin): def __init__( self, + *, l1_ratio: float = 0, P1="identity", P2: Union[str, np.ndarray, sparse.spmatrix] = "identity", @@ -1204,7 +1205,7 @@ def _solve_regularization_path( return self.coef_path_ def report_diagnostics( - self, full_report: bool = False, custom_columns: Optional[Iterable] = None + self, *, full_report: bool = False, custom_columns: Optional[Iterable] = None ) -> None: """Print diagnostics to ``stdout``. @@ -1218,7 +1219,9 @@ def report_diagnostics( custom_columns : iterable, optional (default=None) Print only the specified columns. """ - diagnostics = self.get_formatted_diagnostics(full_report, custom_columns) + diagnostics = self.get_formatted_diagnostics( + full_report=full_report, custom_columns=custom_columns + ) if isinstance(diagnostics, str): print(diagnostics) return @@ -1230,9 +1233,9 @@ def report_diagnostics( print(diagnostics) def get_formatted_diagnostics( - self, full_report: bool = False, custom_columns: Optional[Iterable] = None + self, *, full_report: bool = False, custom_columns: Optional[Iterable] = None ) -> Union[str, pd.DataFrame]: - """Get formatted diagnostics; can be printed with _report_diagnostics. + """Get formatted diagnostics which can be printed with report_diagnostics. Parameters ---------- @@ -1290,6 +1293,7 @@ def linear_predictor( self, X: ArrayLike, offset: Optional[ArrayLike] = None, + *, alpha_index: Optional[Union[int, Sequence[int]]] = None, alpha: Optional[Union[float, Sequence[float]]] = None, context: Optional[Union[int, Mapping[str, Any]]] = 0, @@ -1378,6 +1382,7 @@ def predict( X: ShapedArrayLike, sample_weight: Optional[ArrayLike] = None, offset: Optional[ArrayLike] = None, + *, alpha_index: Optional[Union[int, Sequence[int]]] = None, alpha: Optional[Union[float, Sequence[float]]] = None, context: Optional[Union[int, Mapping[str, Any]]] = 0, @@ -1437,12 +1442,13 @@ def predict( def coef_table( self, - confidence_level=0.95, X=None, y=None, - mu=None, - offset=None, sample_weight=None, + offset=None, + *, + confidence_level=0.95, + mu=None, dispersion=None, robust=None, clusters: np.ndarray = None, @@ -1540,16 +1546,17 @@ def coef_table( def wald_test( self, + X=None, + y=None, + sample_weight=None, + offset=None, + *, R: Optional[np.ndarray] = None, features: Optional[Union[str, list[str]]] = None, terms: Optional[Union[str, list[str]]] = None, formula: Optional[str] = None, r: Optional[Sequence] = None, - X=None, - y=None, mu=None, - offset=None, - sample_weight=None, dispersion=None, robust=None, clusters: np.ndarray = None, @@ -1572,6 +1579,16 @@ def wald_test( Parameters ---------- + X : {array-like, sparse matrix}, shape (n_samples, n_features), optional + Training data. Can be omitted if a covariance matrix has already + been computed. + y : array-like, shape (n_samples,), optional + Target values. Can be omitted if a covariance matrix has already + been computed. + sample_weight : array-like, shape (n_samples,), optional, default=None + Individual weights for each sample. + offset : array-like, optional, default=None + Array with additive offsets. R : np.ndarray, optional, default=None The restriction matrix representing the linear combination of coefficients to test. @@ -1588,18 +1605,8 @@ def wald_test( The vector representing the values of the linear combination. If None, the test is for whether the linear combinations of the coefficients are zero. - X : {array-like, sparse matrix}, shape (n_samples, n_features), optional - Training data. Can be omitted if a covariance matrix has already - been computed. - y : array-like, shape (n_samples,), optional - Target values. Can be omitted if a covariance matrix has already - been computed. mu : array-like, optional, default=None Array with predictions. Estimated if absent. - offset : array-like, optional, default=None - Array with additive offsets. - sample_weight : array-like, shape (n_samples,), optional, default=None - Individual weights for each sample. dispersion : float, optional, default=None The dispersion parameter. Estimated if absent. robust : boolean, optional, default=None @@ -1647,9 +1654,9 @@ def wald_test( r=r, X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -1663,9 +1670,9 @@ def wald_test( values=r, X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -1679,9 +1686,9 @@ def wald_test( values=r, X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -1696,9 +1703,9 @@ def wald_test( formula=formula, X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -1714,9 +1721,9 @@ def _wald_test_matrix( r: Optional[np.ndarray] = None, X=None, y=None, - mu=None, - offset=None, sample_weight=None, + offset=None, + mu=None, dispersion=None, robust=None, clusters: np.ndarray = None, @@ -1744,12 +1751,12 @@ def _wald_test_matrix( y : array-like, shape (n_samples,), optional Target values. Can be omitted if a covariance matrix has already been computed. - mu : array-like, optional, default=None - Array with predictions. Estimated if absent. - offset : array-like, optional, default=None - Array with additive offsets. sample_weight : array-like, shape (n_samples,), optional, default=None Individual weights for each sample. + offset : array-like, optional, default=None + Array with additive offsets. + mu : array-like, optional, default=None + Array with predictions. Estimated if absent. dispersion : float, optional, default=None The dispersion parameter. Estimated if absent. robust : boolean, optional, default=None @@ -1774,9 +1781,9 @@ def _wald_test_matrix( covariance_matrix = self.covariance_matrix( X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -1822,9 +1829,9 @@ def _wald_test_feature_names( values: Optional[Sequence] = None, X=None, y=None, - mu=None, - offset=None, sample_weight=None, + offset=None, + mu=None, dispersion=None, robust=None, clusters: np.ndarray = None, @@ -1849,12 +1856,12 @@ def _wald_test_feature_names( y : array-like, shape (n_samples,), optional Target values. Can be omitted if a covariance matrix has already been computed. - mu : array-like, optional, default=None - Array with predictions. Estimated if absent. - offset : array-like, optional, default=None - Array with additive offsets. sample_weight : array-like, shape (n_samples,), optional, default=None Individual weights for each sample. + offset : array-like, optional, default=None + Array with additive offsets. + mu : array-like, optional, default=None + Array with predictions. Estimated if absent. dispersion : float, optional, default=None The dispersion parameter. Estimated if absent. robust : boolean, optional, default=None @@ -1906,9 +1913,9 @@ def _wald_test_feature_names( r=r, X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -1921,9 +1928,9 @@ def _wald_test_formula( formula: str, X=None, y=None, - mu=None, - offset=None, sample_weight=None, + offset=None, + mu=None, dispersion=None, robust=None, clusters: np.ndarray = None, @@ -1945,12 +1952,12 @@ def _wald_test_formula( y : array-like, shape (n_samples,), optional Target values. Can be omitted if a covariance matrix has already been computed. - mu : array-like, optional, default=None - Array with predictions. Estimated if absent. - offset : array-like, optional, default=None - Array with additive offsets. sample_weight : array-like, shape (n_samples,), optional, default=None Individual weights for each sample. + offset : array-like, optional, default=None + Array with additive offsets. + mu : array-like, optional, default=None + Array with predictions. Estimated if absent. dispersion : float, optional, default=None The dispersion parameter. Estimated if absent. robust : boolean, optional, default=None @@ -1986,9 +1993,9 @@ def _wald_test_formula( r=r, X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -2002,9 +2009,9 @@ def _wald_test_term_names( values: Optional[Sequence] = None, X=None, y=None, - mu=None, - offset=None, sample_weight=None, + offset=None, + mu=None, dispersion=None, robust=None, clusters: np.ndarray = None, @@ -2034,12 +2041,12 @@ def _wald_test_term_names( y : array-like, shape (n_samples,), optional Target values. Can be omitted if a covariance matrix has already been computed. - mu : array-like, optional, default=None - Array with predictions. Estimated if absent. - offset : array-like, optional, default=None - Array with additive offsets. sample_weight : array-like, shape (n_samples,), optional (default=None) Individual weights for each sample. + offset : array-like, optional, default=None + Array with additive offsets. + mu : array-like, optional, default=None + Array with predictions. Estimated if absent. dispersion : float, optional, default=None The dispersion parameter. Estimated if absent. robust : boolean, optional, default=None @@ -2101,9 +2108,9 @@ def _wald_test_term_names( r=r, X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -2115,9 +2122,10 @@ def std_errors( self, X=None, y=None, - mu=None, - offset=None, sample_weight=None, + offset=None, + *, + mu=None, dispersion=None, robust=None, clusters: np.ndarray = None, @@ -2138,12 +2146,12 @@ def std_errors( y : array-like, shape (n_samples,), optional Target values. Can be omitted if a covariance matrix has already been computed. - mu : array-like, optional, default=None - Array with predictions. Estimated if absent. - offset : array-like, optional, default=None - Array with additive offsets. sample_weight : array-like, shape (n_samples,), optional, default=None Individual weights for each sample. + offset : array-like, optional, default=None + Array with additive offsets. + mu : array-like, optional, default=None + Array with predictions. Estimated if absent. dispersion : float, optional, default=None The dispersion parameter. Estimated if absent. robust : boolean, optional, default=None @@ -2172,9 +2180,9 @@ def std_errors( self.covariance_matrix( X=X, y=y, - mu=mu, - offset=offset, sample_weight=sample_weight, + offset=offset, + mu=mu, dispersion=dispersion, robust=robust, clusters=clusters, @@ -2188,9 +2196,10 @@ def covariance_matrix( self, X=None, y=None, - mu=None, - offset=None, sample_weight=None, + offset=None, + *, + mu=None, dispersion=None, robust=None, clusters: Optional[np.ndarray] = None, @@ -2480,6 +2489,7 @@ def score( y: ShapedArrayLike, sample_weight: Optional[ArrayLike] = None, offset: Optional[ArrayLike] = None, + *, context: Optional[Union[int, Mapping[str, Any]]] = 0, ): """Compute :math:`D^2`, the percentage of deviance explained. @@ -3238,6 +3248,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase): def __init__( self, + *, alpha=None, l1_ratio=0, P1="identity", @@ -3362,6 +3373,7 @@ def fit( y: Optional[ArrayLike] = None, sample_weight: Optional[ArrayLike] = None, offset: Optional[ArrayLike] = None, + *, store_covariance_matrix: bool = False, clusters: Optional[np.ndarray] = None, # TODO: take out weights_sum (or use it properly) @@ -3707,6 +3719,7 @@ def aic( X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None, + *, context: Optional[Union[int, Mapping[str, Any]]] = 0, ): """ @@ -3745,6 +3758,7 @@ def aicc( X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None, + *, context: Optional[Union[int, Mapping[str, Any]]] = 0, ): """ @@ -3790,6 +3804,7 @@ def bic( X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None, + *, context: Optional[Union[int, Mapping[str, Any]]] = 0, ): """ diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py index 31fd58df..c84fb6f3 100644 --- a/src/glum/_glm_cv.py +++ b/src/glum/_glm_cv.py @@ -322,6 +322,7 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase): def __init__( self, + *, l1_ratio=0, P1="identity", P2="identity", @@ -424,6 +425,7 @@ def fit( y: ArrayLike, sample_weight: Optional[ArrayLike] = None, offset: Optional[ArrayLike] = None, + *, store_covariance_matrix: bool = False, clusters: Optional[np.ndarray] = None, context: Optional[Union[int, Mapping[str, Any]]] = None, @@ -531,7 +533,7 @@ def fit( else: _stype = ["csc", "csr"] - def fit_path( + def _fit_path( self, train_idx, test_idx, @@ -665,7 +667,7 @@ def _get_deviance(coef): return intercept_path_, coef_path_, deviance_path_ jobs = ( - delayed(fit_path)( + delayed(_fit_path)( self, train_idx=train_idx, test_idx=test_idx, diff --git a/tests/glm/test_glm.py b/tests/glm/test_glm.py index 08080212..4645f2c2 100644 --- a/tests/glm/test_glm.py +++ b/tests/glm/test_glm.py @@ -616,7 +616,9 @@ def test_get_diagnostics( glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept, solver=solver) res = glm.fit(X, y) - diagnostics = res.get_formatted_diagnostics(full_report, custom_columns) + diagnostics = res.get_formatted_diagnostics( + full_report=full_report, custom_columns=custom_columns + ) if solver in ("lbfgs", "trust-constr"): assert diagnostics == "solver does not report diagnostics" else: @@ -678,7 +680,7 @@ def test_report_diagnostics( f = io.StringIO() with redirect_stdout(f): - res.report_diagnostics(full_report, custom_columns) + res.report_diagnostics(full_report=full_report, custom_columns=custom_columns) printed = f.getvalue() # Something should be printed assert len(printed) > 0