Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No regularization as default #758

Merged
merged 10 commits into from
Feb 1, 2024
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Changelog
3.0.0 - UNRELEASED
------------------

**Breaking change:**

- :class:`~glum.GeneralizedLinearRegressor`'s default value for `alpha` is now `0`, i.e. no regularization.

**New features:**

- Added a formula interface for specifying models.
Expand Down
18 changes: 9 additions & 9 deletions src/glum/_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2304,8 +2304,7 @@ def covariance_matrix(
_expected_information = expected_information

if (
(hasattr(self, "alpha") and self.alpha is None)
or (
(
hasattr(self, "alpha")
and isinstance(self.alpha, (int, float))
and self.alpha > 0
Expand Down Expand Up @@ -2914,11 +2913,11 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
alpha : {float, array-like}, optional (default=None)
Constant that multiplies the penalty terms and thus determines the
regularization strength. If ``alpha_search`` is ``False`` (the default),
then ``alpha`` must be a scalar or None (equivalent to ``alpha=1.0``).
then ``alpha`` must be a scalar or None (equivalent to ``alpha=0``).
If ``alpha_search`` is ``True``, then ``alpha`` must be an iterable or
``None``. See ``alpha_search`` to find how the regularization path is
set if ``alpha`` is ``None``. See the notes for the exact mathematical
meaning of this parameter. ``alpha = 0`` is equivalent to unpenalized
meaning of this parameter. ``alpha=0`` is equivalent to unpenalized
GLMs. In this case, the design matrix ``X`` must have full column rank
(no collinearities).

Expand Down Expand Up @@ -3146,10 +3145,11 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):

drop_first : bool, optional (default = False)
If ``True``, drop the first column when encoding categorical variables.
Set this to True when alpha=0 and solver='auto' to prevent an error due to a
singular feature matrix. In the case of using a formula with interactions,
setting this argument to ``True`` ensures structural full-rankness (it is
equivalent to ``ensure_full_rank`` in formulaic and tabmat).
Set this to True when ``alpha=0`` and ``solver='auto'`` to prevent an error
due to a singular feature matrix. In the case of using a formula with
interactions, setting this argument to ``True`` ensures structural
full-rankness (it is equivalent to ``ensure_full_rank`` in formulaic and
tabmat).

robust : bool, optional (default = False)
If true, then robust standard errors are computed by default.
Expand Down Expand Up @@ -3573,7 +3573,7 @@ def fit(
self.coef_ = self.coef_path_[-1]
else:
if self.alpha is None:
_alpha = 1.0
_alpha = 0.0
else:
_alpha = self.alpha
if _alpha > 0 and self.l1_ratio > 0 and self._solver != "irls-cd":
Expand Down
40 changes: 20 additions & 20 deletions tests/glm/golden_master/simulation_gm.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"normal": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.5027665204024282,
0.23449539956055546,
Expand Down Expand Up @@ -36,7 +36,7 @@
"intercept_": 3.026490229054092,
"n_iter_": 1
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.5012056522046088,
0.23528722263235485,
Expand Down Expand Up @@ -72,7 +72,7 @@
"intercept_": 2.0279948791150764,
"n_iter_": 1
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.49784759015593427,
0.23166926058137094,
Expand Down Expand Up @@ -108,7 +108,7 @@
"intercept_": 2.981778440705444,
"n_iter_": 1
},
"default": {
"regularization": {
"coef_": [
0.4985676422254175,
0.22818569911229844,
Expand Down Expand Up @@ -1478,7 +1478,7 @@
}
},
"poisson": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.9604408672344522,
0.4432562524921413,
Expand Down Expand Up @@ -1514,7 +1514,7 @@
"intercept_": 1.8189178943867188,
"n_iter_": 6
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.9817372866211753,
0.49117907395980553,
Expand Down Expand Up @@ -1550,7 +1550,7 @@
"intercept_": 1.157828764208921,
"n_iter_": 6
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.9693196874148616,
0.46707910961062293,
Expand Down Expand Up @@ -1586,7 +1586,7 @@
"intercept_": 1.8396971485658087,
"n_iter_": 6
},
"default": {
"regularization": {
"coef_": [
0.9821298947770232,
0.4937841900606277,
Expand Down Expand Up @@ -2812,7 +2812,7 @@
}
},
"gamma": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.4866808417045077,
0.1370793228217412,
Expand Down Expand Up @@ -2848,7 +2848,7 @@
"intercept_": 5.268950639816242,
"n_iter_": 4
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.48972345202083134,
0.24707128799109493,
Expand Down Expand Up @@ -2884,7 +2884,7 @@
"intercept_": 2.512993119536852,
"n_iter_": 4
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.5107634971640694,
0.1783139942111257,
Expand Down Expand Up @@ -2920,7 +2920,7 @@
"intercept_": 5.272870219406924,
"n_iter_": 4
},
"default": {
"regularization": {
"coef_": [
0.4966531683982075,
0.24896254652599858,
Expand Down Expand Up @@ -4146,7 +4146,7 @@
}
},
"tweedie_p=1.5": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.8740584736837378,
0.39026903329437757,
Expand Down Expand Up @@ -4182,7 +4182,7 @@
"intercept_": 2.8380327257627473,
"n_iter_": 4
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.8592854961617753,
0.42694459825027725,
Expand Down Expand Up @@ -4218,7 +4218,7 @@
"intercept_": 1.6496674803774887,
"n_iter_": 4
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.8763610403720393,
0.4023951463085115,
Expand Down Expand Up @@ -4254,7 +4254,7 @@
"intercept_": 2.7855262434295343,
"n_iter_": 4
},
"default": {
"regularization": {
"coef_": [
0.860178238544325,
0.43000049156945763,
Expand Down Expand Up @@ -5480,7 +5480,7 @@
}
},
"binomial": {
"default_weights_offset": {
"regularization_weights_offset": {
"coef_": [
0.0645115293284631,
0.03563706184469416,
Expand Down Expand Up @@ -5516,7 +5516,7 @@
"intercept_": 3.3761974509366994,
"n_iter_": 3
},
"default_weights": {
"regularization_weights": {
"coef_": [
0.06396142685405831,
0.03544619397195947,
Expand Down Expand Up @@ -5552,7 +5552,7 @@
"intercept_": 2.007458821879875,
"n_iter_": 2
},
"default_offset": {
"regularization_offset": {
"coef_": [
0.059850128940604715,
0.029620907232596274,
Expand Down Expand Up @@ -5588,7 +5588,7 @@
"intercept_": 3.4202998674202676,
"n_iter_": 3
},
"default": {
"regularization": {
"coef_": [
0.05979957149348005,
0.03233408720147587,
Expand Down
6 changes: 0 additions & 6 deletions tests/glm/test_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ def test_poisson_deviance_dispersion_loglihood(weighted):
# logLik(glm_model) # -7.390977 (df=1)

regressor = GeneralizedLinearRegressor(
alpha=0,
family="poisson",
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -345,7 +344,6 @@ def test_gamma_deviance_dispersion_loglihood(weighted):
# logLik(glm_model) # -7.057068 (df=2)

regressor = GeneralizedLinearRegressor(
alpha=0,
family="gamma",
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -393,7 +391,6 @@ def test_gaussian_deviance_dispersion_loglihood(family, weighted):
# logLik(glm_model) # -7.863404 (df=2)

regressor = GeneralizedLinearRegressor(
alpha=0,
family=family,
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -441,7 +438,6 @@ def test_tweedie_deviance_dispersion_loglihood(weighted):
# logLiktweedie(glm_model) # -8.35485

regressor = GeneralizedLinearRegressor(
alpha=0,
family=TweedieDistribution(1.5),
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -490,7 +486,6 @@ def test_binomial_deviance_dispersion_loglihood(weighted):
# logLik(glm_model) # -3.365058 (df=1)

regressor = GeneralizedLinearRegressor(
alpha=0,
family="binomial",
fit_intercept=False,
gradient_tol=1e-8,
Expand Down Expand Up @@ -535,7 +530,6 @@ def test_negative_binomial_deviance_dispersion_loglihood(weighted):
# logLik(glm_model) # -4.187887 (df=1)

regressor = GeneralizedLinearRegressor(
alpha=0,
family="negative.binomial",
fit_intercept=False,
gradient_tol=1e-8,
Expand Down
Loading
Loading