No regularization as default (#758)

* set alpha=0 as default * fix docstring * add alpha where needed to avoid LinAlgError * add changelog entry * also set alpha in golden master * change name in persisted file too * set alpha in model_parameters again * don't modify case of no alpha attribute, which is RegressorCV * remove invalid alpha argument * wording
Quantco · Feb 1, 2024 · 137d9fb · 137d9fb
1 parent 6816dad
commit 137d9fb
Show file tree

Hide file tree

Showing 6 changed files with 114 additions and 110 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -10,6 +10,10 @@ Changelog
 3.0.0 - UNRELEASED
 ------------------
 
+**Breaking change:**
+
+- :class:`~glum.GeneralizedLinearRegressor`'s default value for `alpha` is now `0`, i.e. no regularization.
+
 **New features:**
 
 - Added a formula interface for specifying models.

diff --git a/src/glum/_glm.py b/src/glum/_glm.py
@@ -2304,8 +2304,7 @@ def covariance_matrix(
             _expected_information = expected_information
 
         if (
-            (hasattr(self, "alpha") and self.alpha is None)
-            or (
+            (
                 hasattr(self, "alpha")
                 and isinstance(self.alpha, (int, float))
                 and self.alpha > 0
@@ -2914,11 +2913,11 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
     alpha : {float, array-like}, optional (default=None)
         Constant that multiplies the penalty terms and thus determines the
         regularization strength. If ``alpha_search`` is ``False`` (the default),
-        then ``alpha`` must be a scalar or None (equivalent to ``alpha=1.0``).
+        then ``alpha`` must be a scalar or None (equivalent to ``alpha=0``).
         If ``alpha_search`` is ``True``, then ``alpha`` must be an iterable or
         ``None``. See ``alpha_search`` to find how the regularization path is
         set if ``alpha`` is ``None``. See the notes for the exact mathematical
-        meaning of this parameter. ``alpha = 0`` is equivalent to unpenalized
+        meaning of this parameter. ``alpha=0`` is equivalent to unpenalized
         GLMs. In this case, the design matrix ``X`` must have full column rank
         (no collinearities).
 
@@ -3146,10 +3145,11 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
 
     drop_first : bool, optional (default = False)
         If ``True``, drop the first column when encoding categorical variables.
-        Set this to True when alpha=0 and solver='auto' to prevent an error due to a
-        singular feature matrix. In the case of using a formula with interactions,
-        setting this argument to ``True`` ensures structural full-rankness (it is
-        equivalent to ``ensure_full_rank`` in formulaic and tabmat).
+        Set this to True when ``alpha=0`` and ``solver='auto'`` to prevent an error
+        due to a singular feature matrix. In the case of using a formula with
+        interactions, setting this argument to ``True`` ensures structural
+        full-rankness (it is equivalent to ``ensure_full_rank`` in formulaic and
+        tabmat).
 
     robust : bool, optional (default = False)
         If true, then robust standard errors are computed by default.
@@ -3573,7 +3573,7 @@ def fit(
                 self.coef_ = self.coef_path_[-1]
         else:
             if self.alpha is None:
-                _alpha = 1.0
+                _alpha = 0.0
             else:
                 _alpha = self.alpha
             if _alpha > 0 and self.l1_ratio > 0 and self._solver != "irls-cd":

diff --git a/tests/glm/golden_master/simulation_gm.json b/tests/glm/golden_master/simulation_gm.json
@@ -1,6 +1,6 @@
 {
   "normal": {
-    "default_weights_offset": {
+    "regularization_weights_offset": {
       "coef_": [
         0.5027665204024282,
         0.23449539956055546,
@@ -36,7 +36,7 @@
       "intercept_": 3.026490229054092,
       "n_iter_": 1
     },
-    "default_weights": {
+    "regularization_weights": {
       "coef_": [
         0.5012056522046088,
         0.23528722263235485,
@@ -72,7 +72,7 @@
       "intercept_": 2.0279948791150764,
       "n_iter_": 1
     },
-    "default_offset": {
+    "regularization_offset": {
       "coef_": [
         0.49784759015593427,
         0.23166926058137094,
@@ -108,7 +108,7 @@
       "intercept_": 2.981778440705444,
       "n_iter_": 1
     },
-    "default": {
+    "regularization": {
       "coef_": [
         0.4985676422254175,
         0.22818569911229844,
@@ -1478,7 +1478,7 @@
     }
   },
   "poisson": {
-    "default_weights_offset": {
+    "regularization_weights_offset": {
       "coef_": [
         0.9604408672344522,
         0.4432562524921413,
@@ -1514,7 +1514,7 @@
       "intercept_": 1.8189178943867188,
       "n_iter_": 6
     },
-    "default_weights": {
+    "regularization_weights": {
       "coef_": [
         0.9817372866211753,
         0.49117907395980553,
@@ -1550,7 +1550,7 @@
       "intercept_": 1.157828764208921,
       "n_iter_": 6
     },
-    "default_offset": {
+    "regularization_offset": {
       "coef_": [
         0.9693196874148616,
         0.46707910961062293,
@@ -1586,7 +1586,7 @@
       "intercept_": 1.8396971485658087,
       "n_iter_": 6
     },
-    "default": {
+    "regularization": {
       "coef_": [
         0.9821298947770232,
         0.4937841900606277,
@@ -2812,7 +2812,7 @@
     }
   },
   "gamma": {
-    "default_weights_offset": {
+    "regularization_weights_offset": {
       "coef_": [
         0.4866808417045077,
         0.1370793228217412,
@@ -2848,7 +2848,7 @@
       "intercept_": 5.268950639816242,
       "n_iter_": 4
     },
-    "default_weights": {
+    "regularization_weights": {
       "coef_": [
         0.48972345202083134,
         0.24707128799109493,
@@ -2884,7 +2884,7 @@
       "intercept_": 2.512993119536852,
       "n_iter_": 4
     },
-    "default_offset": {
+    "regularization_offset": {
       "coef_": [
         0.5107634971640694,
         0.1783139942111257,
@@ -2920,7 +2920,7 @@
       "intercept_": 5.272870219406924,
       "n_iter_": 4
     },
-    "default": {
+    "regularization": {
       "coef_": [
         0.4966531683982075,
         0.24896254652599858,
@@ -4146,7 +4146,7 @@
     }
   },
   "tweedie_p=1.5": {
-    "default_weights_offset": {
+    "regularization_weights_offset": {
       "coef_": [
         0.8740584736837378,
         0.39026903329437757,
@@ -4182,7 +4182,7 @@
       "intercept_": 2.8380327257627473,
       "n_iter_": 4
     },
-    "default_weights": {
+    "regularization_weights": {
       "coef_": [
         0.8592854961617753,
         0.42694459825027725,
@@ -4218,7 +4218,7 @@
       "intercept_": 1.6496674803774887,
       "n_iter_": 4
     },
-    "default_offset": {
+    "regularization_offset": {
       "coef_": [
         0.8763610403720393,
         0.4023951463085115,
@@ -4254,7 +4254,7 @@
       "intercept_": 2.7855262434295343,
       "n_iter_": 4
     },
-    "default": {
+    "regularization": {
       "coef_": [
         0.860178238544325,
         0.43000049156945763,
@@ -5480,7 +5480,7 @@
     }
   },
   "binomial": {
-    "default_weights_offset": {
+    "regularization_weights_offset": {
       "coef_": [
         0.0645115293284631,
         0.03563706184469416,
@@ -5516,7 +5516,7 @@
       "intercept_": 3.3761974509366994,
       "n_iter_": 3
     },
-    "default_weights": {
+    "regularization_weights": {
       "coef_": [
         0.06396142685405831,
         0.03544619397195947,
@@ -5552,7 +5552,7 @@
       "intercept_": 2.007458821879875,
       "n_iter_": 2
     },
-    "default_offset": {
+    "regularization_offset": {
       "coef_": [
         0.059850128940604715,
         0.029620907232596274,
@@ -5588,7 +5588,7 @@
       "intercept_": 3.4202998674202676,
       "n_iter_": 3
     },
-    "default": {
+    "regularization": {
       "coef_": [
         0.05979957149348005,
         0.03233408720147587,

diff --git a/tests/glm/test_distribution.py b/tests/glm/test_distribution.py
@@ -296,7 +296,6 @@ def test_poisson_deviance_dispersion_loglihood(weighted):
     # logLik(glm_model)  # -7.390977 (df=1)
 
     regressor = GeneralizedLinearRegressor(
-        alpha=0,
         family="poisson",
         fit_intercept=False,
         gradient_tol=1e-8,
@@ -345,7 +344,6 @@ def test_gamma_deviance_dispersion_loglihood(weighted):
     # logLik(glm_model)  # -7.057068 (df=2)
 
     regressor = GeneralizedLinearRegressor(
-        alpha=0,
         family="gamma",
         fit_intercept=False,
         gradient_tol=1e-8,
@@ -393,7 +391,6 @@ def test_gaussian_deviance_dispersion_loglihood(family, weighted):
     # logLik(glm_model)  # -7.863404 (df=2)
 
     regressor = GeneralizedLinearRegressor(
-        alpha=0,
         family=family,
         fit_intercept=False,
         gradient_tol=1e-8,
@@ -441,7 +438,6 @@ def test_tweedie_deviance_dispersion_loglihood(weighted):
     # logLiktweedie(glm_model)  # -8.35485
 
     regressor = GeneralizedLinearRegressor(
-        alpha=0,
         family=TweedieDistribution(1.5),
         fit_intercept=False,
         gradient_tol=1e-8,
@@ -490,7 +486,6 @@ def test_binomial_deviance_dispersion_loglihood(weighted):
     # logLik(glm_model)  # -3.365058 (df=1)
 
     regressor = GeneralizedLinearRegressor(
-        alpha=0,
         family="binomial",
         fit_intercept=False,
         gradient_tol=1e-8,
@@ -535,7 +530,6 @@ def test_negative_binomial_deviance_dispersion_loglihood(weighted):
     # logLik(glm_model)  # -4.187887 (df=1)
 
     regressor = GeneralizedLinearRegressor(
-        alpha=0,
         family="negative.binomial",
         fit_intercept=False,
         gradient_tol=1e-8,