Skip to content

Commit

Permalink
No regularization as default (#758)
Browse files Browse the repository at this point in the history
* set alpha=0 as default

* fix docstring

* add alpha where needed to avoid LinAlgError

* add changelog entry

* also set alpha in golden master

* change name in persisted file too

* set alpha in model_parameters again

* don't modify case of no alpha attribute, which is RegressorCV

* remove invalid alpha argument

* wording
  • Loading branch information
MatthiasSchmidtblaicherQC authored Feb 1, 2024
1 parent 6816dad commit 137d9fb
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 110 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Changelog
3.0.0 - UNRELEASED
------------------

**Breaking change:**

- :class:`~glum.GeneralizedLinearRegressor`'s default value for `alpha` is now `0`, i.e. no regularization.

**New features:**

- Added a formula interface for specifying models.
Expand Down
18 changes: 9 additions & 9 deletions src/glum/_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2304,8 +2304,7 @@ def covariance_matrix(
_expected_information = expected_information

if (
(hasattr(self, "alpha") and self.alpha is None)
or (
(
hasattr(self, "alpha")
and isinstance(self.alpha, (int, float))
and self.alpha > 0
Expand Down Expand Up @@ -2914,11 +2913,11 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
alpha : {float, array-like}, optional (default=None)
Constant that multiplies the penalty terms and thus determines the
regularization strength. If ``alpha_search`` is ``False`` (the default),
then ``alpha`` must be a scalar or None (equivalent to ``alpha=1.0``).
then ``alpha`` must be a scalar or None (equivalent to ``alpha=0``).
If ``alpha_search`` is ``True``, then ``alpha`` must be an iterable or
``None``. See ``alpha_search`` to find how the regularization path is
set if ``alpha`` is ``None``. See the notes for the exact mathematical
meaning of this parameter. ``alpha = 0`` is equivalent to unpenalized
meaning of this parameter. ``alpha=0`` is equivalent to unpenalized
GLMs. In this case, the design matrix ``X`` must have full column rank
(no collinearities).
Expand Down Expand Up @@ -3146,10 +3145,11 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
drop_first : bool, optional (default = False)
If ``True``, drop the first column when encoding categorical variables.
Set this to True when alpha=0 and solver='auto' to prevent an error due to a
singular feature matrix. In the case of using a formula with interactions,
setting this argument to ``True`` ensures structural full-rankness (it is
equivalent to ``ensure_full_rank`` in formulaic and tabmat).
Set this to True when ``alpha=0`` and ``solver='auto'`` to prevent an error
due to a singular feature matrix. In the case of using a formula with
interactions, setting this argument to ``True`` ensures structural
full-rankness (it is equivalent to ``ensure_full_rank`` in formulaic and
tabmat).
robust : bool, optional (default = False)
If true, then robust standard errors are computed by default.
Expand Down Expand Up @@ -3573,7 +3573,7 @@ def fit(
self.coef_ = self.coef_path_[-1]
else:
if self.alpha is None:
_alpha = 1.0
_alpha = 0.0
else:
_alpha = self.alpha
if _alpha > 0 and self.l1_ratio > 0 and self._solver != "irls-cd":
Expand Down
40 changes: 20 additions & 20 deletions tests/glm/golden_master/simulation_gm.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"normal": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.5027665204024282,
0.23449539956055546,
Expand Down Expand Up @@ -36,7 +36,7 @@
"intercept_": 3.026490229054092,
"n_iter_": 1
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.5012056522046088,
0.23528722263235485,
Expand Down Expand Up @@ -72,7 +72,7 @@
"intercept_": 2.0279948791150764,
"n_iter_": 1
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.49784759015593427,
0.23166926058137094,
Expand Down Expand Up @@ -108,7 +108,7 @@
"intercept_": 2.981778440705444,
"n_iter_": 1
},
"default": {
"regularization": {
"coef_": [
0.4985676422254175,
0.22818569911229844,
Expand Down Expand Up @@ -1478,7 +1478,7 @@
}
},
"poisson": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.9604408672344522,
0.4432562524921413,
Expand Down Expand Up @@ -1514,7 +1514,7 @@
"intercept_": 1.8189178943867188,
"n_iter_": 6
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.9817372866211753,
0.49117907395980553,
Expand Down Expand Up @@ -1550,7 +1550,7 @@
"intercept_": 1.157828764208921,
"n_iter_": 6
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.9693196874148616,
0.46707910961062293,
Expand Down Expand Up @@ -1586,7 +1586,7 @@
"intercept_": 1.8396971485658087,
"n_iter_": 6
},
"default": {
"regularization": {
"coef_": [
0.9821298947770232,
0.4937841900606277,
Expand Down Expand Up @@ -2812,7 +2812,7 @@
}
},
"gamma": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.4866808417045077,
0.1370793228217412,
Expand Down Expand Up @@ -2848,7 +2848,7 @@
"intercept_": 5.268950639816242,
"n_iter_": 4
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.48972345202083134,
0.24707128799109493,
Expand Down Expand Up @@ -2884,7 +2884,7 @@
"intercept_": 2.512993119536852,
"n_iter_": 4
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.5107634971640694,
0.1783139942111257,
Expand Down Expand Up @@ -2920,7 +2920,7 @@
"intercept_": 5.272870219406924,
"n_iter_": 4
},
"default": {
"regularization": {
"coef_": [
0.4966531683982075,
0.24896254652599858,
Expand Down Expand Up @@ -4146,7 +4146,7 @@
}
},
"tweedie_p=1.5": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.8740584736837378,
0.39026903329437757,
Expand Down Expand Up @@ -4182,7 +4182,7 @@
"intercept_": 2.8380327257627473,
"n_iter_": 4
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.8592854961617753,
0.42694459825027725,
Expand Down Expand Up @@ -4218,7 +4218,7 @@
"intercept_": 1.6496674803774887,
"n_iter_": 4
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.8763610403720393,
0.4023951463085115,
Expand Down Expand Up @@ -4254,7 +4254,7 @@
"intercept_": 2.7855262434295343,
"n_iter_": 4
},
"default": {
"regularization": {
"coef_": [
0.860178238544325,
0.43000049156945763,
Expand Down Expand Up @@ -5480,7 +5480,7 @@
}
},
"binomial": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.0645115293284631,
0.03563706184469416,
Expand Down Expand Up @@ -5516,7 +5516,7 @@
"intercept_": 3.3761974509366994,
"n_iter_": 3
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.06396142685405831,
0.03544619397195947,
Expand Down Expand Up @@ -5552,7 +5552,7 @@
"intercept_": 2.007458821879875,
"n_iter_": 2
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.059850128940604715,
0.029620907232596274,
Expand Down Expand Up @@ -5588,7 +5588,7 @@
"intercept_": 3.4202998674202676,
"n_iter_": 3
},
"default": {
"regularization": {
"coef_": [
0.05979957149348005,
0.03233408720147587,
Expand Down
6 changes: 0 additions & 6 deletions tests/glm/test_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ def test_poisson_deviance_dispersion_loglihood(weighted):
# logLik(glm_model) # -7.390977 (df=1)

regressor = GeneralizedLinearRegressor(
alpha=0,
family="poisson",
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -345,7 +344,6 @@ def test_gamma_deviance_dispersion_loglihood(weighted):
# logLik(glm_model) # -7.057068 (df=2)

regressor = GeneralizedLinearRegressor(
alpha=0,
family="gamma",
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -393,7 +391,6 @@ def test_gaussian_deviance_dispersion_loglihood(family, weighted):
# logLik(glm_model) # -7.863404 (df=2)

regressor = GeneralizedLinearRegressor(
alpha=0,
family=family,
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -441,7 +438,6 @@ def test_tweedie_deviance_dispersion_loglihood(weighted):
# logLiktweedie(glm_model) # -8.35485

regressor = GeneralizedLinearRegressor(
alpha=0,
family=TweedieDistribution(1.5),
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -490,7 +486,6 @@ def test_binomial_deviance_dispersion_loglihood(weighted):
# logLik(glm_model) # -3.365058 (df=1)

regressor = GeneralizedLinearRegressor(
alpha=0,
family="binomial",
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -535,7 +530,6 @@ def test_negative_binomial_deviance_dispersion_loglihood(weighted):
# logLik(glm_model) # -4.187887 (df=1)

regressor = GeneralizedLinearRegressor(
alpha=0,
family="negative.binomial",
fit_intercept=False,
gradient_tol=1e-8,
Expand Down
Loading

0 comments on commit 137d9fb

Please sign in to comment.