Skip to content

Commit

Permalink
Accept None more generally (#780)
Browse files Browse the repository at this point in the history
  • Loading branch information
lbittarello authored Mar 18, 2024
1 parent 02e9e08 commit a80e043
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 77 deletions.
115 changes: 56 additions & 59 deletions src/glum/_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,12 +500,13 @@ def get_link(link: Union[str, Link], family: ExponentialDispersionModel) -> Link


def setup_p1(
P1: Union[str, np.ndarray],
P1: Optional[Union[str, np.ndarray]],
X: Union[tm.MatrixBase, tm.StandardizedMatrix],
_dtype,
dtype,
alpha: float,
l1_ratio: float,
) -> np.ndarray:

if not isinstance(X, (tm.MatrixBase, tm.StandardizedMatrix)):
raise TypeError

Expand All @@ -514,11 +515,13 @@ def setup_p1(
if isinstance(P1, str):
if P1 != "identity":
raise ValueError(f"P1 must be either 'identity' or an array; got {P1}.")
P1 = np.ones(n_features, dtype=_dtype)
P1 = np.ones(n_features, dtype=dtype)
elif P1 is None:
P1 = np.ones(n_features, dtype=dtype)
else:
P1 = np.atleast_1d(P1)
try:
P1 = P1.astype(_dtype, casting="safe", copy=False)
P1 = P1.astype(dtype, casting="safe", copy=False)
except TypeError as e:
raise TypeError(
"The given P1 cannot be converted to a numeric array; "
Expand All @@ -533,37 +536,41 @@ def setup_p1(

# P1 and P2 are now for sure copies
P1 = alpha * l1_ratio * P1
return cast(np.ndarray, P1).astype(_dtype)
return cast(np.ndarray, P1).astype(dtype)


def setup_p2(
P2: Union[str, np.ndarray, sparse.spmatrix],
P2: Optional[Union[str, np.ndarray, sparse.spmatrix]],
X: Union[tm.MatrixBase, tm.StandardizedMatrix],
_stype,
_dtype,
stype,
dtype,
alpha: float,
l1_ratio: float,
) -> Union[np.ndarray, sparse.spmatrix]:

if not isinstance(X, (tm.MatrixBase, tm.StandardizedMatrix)):
raise TypeError

n_features = X.shape[1]

def _setup_sparse_p2(P2):
return (sparse.dia_matrix((P2, 0), shape=(n_features, n_features))).tocsc()

if isinstance(P2, str):
if P2 != "identity":
raise ValueError(f"P2 must be either 'identity' or an array. Got {P2}.")
if sparse.issparse(X): # if X is sparse, make P2 sparse, too
P2 = (
sparse.dia_matrix(
(np.ones(n_features, dtype=_dtype), 0),
shape=(n_features, n_features),
)
).tocsc()
P2 = _setup_sparse_p2(np.ones(n_features, dtype=dtype))
else:
P2 = np.ones(n_features, dtype=dtype)
elif P2 is None:
if sparse.issparse(X): # if X is sparse, make P2 sparse, too
P2 = _setup_sparse_p2(np.ones(n_features, dtype=dtype))
else:
P2 = np.ones(n_features, dtype=_dtype)
P2 = np.ones(n_features, dtype=dtype)
else:
P2 = check_array(
P2, copy=True, accept_sparse=_stype, dtype=_dtype, ensure_2d=False
P2, copy=True, accept_sparse=stype, dtype=dtype, ensure_2d=False
)
P2 = cast(np.ndarray, P2)
if P2.ndim == 1:
Expand All @@ -575,9 +582,7 @@ def setup_p2(
f"got (P2.shape={P2.shape})."
)
if sparse.issparse(X):
P2 = (
sparse.dia_matrix((P2, 0), shape=(n_features, n_features))
).tocsc()
P2 = _setup_sparse_p2(P2)
elif P2.ndim == 2 and P2.shape[0] == P2.shape[1] and P2.shape[0] == n_features:
if sparse.issparse(X):
P2 = sparse.csc_matrix(P2)
Expand All @@ -604,7 +609,7 @@ def setup_p2(


def initialize_start_params(
start_params: Optional[np.ndarray], n_cols: int, fit_intercept: bool, _dtype
start_params: Optional[np.ndarray], n_cols: int, fit_intercept: bool, dtype
) -> Optional[np.ndarray]:
if start_params is None:
return None
Expand All @@ -614,7 +619,7 @@ def initialize_start_params(
accept_sparse=False,
force_all_finite=True,
ensure_2d=False,
dtype=_dtype,
dtype=dtype,
copy=True,
)

Expand Down Expand Up @@ -696,12 +701,12 @@ class GeneralizedLinearRegressorBase(BaseEstimator, RegressorMixin):
def __init__(
self,
l1_ratio: float = 0,
P1="identity",
P2: Union[str, np.ndarray, sparse.spmatrix] = "identity",
P1: Optional[Union[str, np.ndarray]] = "identity",
P2: Optional[Union[str, np.ndarray, sparse.spmatrix]] = "identity",
fit_intercept=True,
family: Union[str, ExponentialDispersionModel] = "normal",
link: Union[str, Link] = "auto",
solver="auto",
solver: str = "auto",
max_iter=100,
gradient_tol: Optional[float] = None,
step_size_tol: Optional[float] = None,
Expand Down Expand Up @@ -2186,14 +2191,14 @@ def _validate_hyperparameters(self) -> None:
"scale_predictors=True is not supported when fit_intercept=False."
)
if ((self.lower_bounds is not None) or (self.upper_bounds is not None)) and (
self.solver not in ["irls-cd", "auto"]
self.solver not in ["auto", "irls-cd"]
):
raise ValueError(
"Only the 'cd' solver is supported when bounds are set; "
f"got {self.solver}."
)
if ((self.A_ineq is not None) or (self.b_ineq is not None)) and (
self.solver not in ["trust-constr", "auto"]
self.solver not in [None, "auto", "trust-constr"]
):
raise ValueError(
"Only the 'trust-constr' solver supports inequality constraints; "
Expand Down Expand Up @@ -2249,11 +2254,8 @@ def _set_up_and_check_fit_args(
Union[str, np.ndarray],
Union[str, np.ndarray],
]:
_dtype = [np.float64, np.float32]
if solver == "irls-cd":
_stype = ["csc"]
else:
_stype = ["csc", "csr"]
dtype = [np.float64, np.float32]
stype = ["csc"] if solver == "irls-cd" else ["csc", "csr"]

P1 = self.P1
P2 = self.P2
Expand Down Expand Up @@ -2357,8 +2359,8 @@ def _expand_categorical_penalties(penalty, X, drop_first):
X, y = check_X_y_tabmat_compliant(
X,
y,
accept_sparse=_stype,
dtype=_dtype,
accept_sparse=stype,
dtype=dtype,
copy=copy_X,
force_all_finite=force_all_finite,
drop_first=getattr(self, "drop_first", False),
Expand All @@ -2369,8 +2371,8 @@ def _expand_categorical_penalties(penalty, X, drop_first):
X,
y,
ensure_2d=True,
accept_sparse=_stype,
dtype=_dtype,
accept_sparse=stype,
dtype=dtype,
copy=copy_X,
force_all_finite=force_all_finite,
)
Expand Down Expand Up @@ -2459,7 +2461,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a
combination of L1 and L2.
P1 : {'identity', array-like}, shape (n_features,), optional (default='identity')
P1 : {'identity', array-like, None}, shape (n_features,), optional (default='identity')
This array controls the strength of the regularization for each coefficient
independently. A high value will lead to higher regularization while a value of
zero will remove the regularization on this parameter.
Expand All @@ -2468,20 +2470,20 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
the penalty of the categorical column will be applied to all the levels of
the categorical.
P2 : {'identity', array-like, sparse matrix}, shape (n_features,) \
P2 : {'identity', array-like, sparse matrix, None}, shape (n_features,) \
or (n_features, n_features), optional (default='identity')
With this option, you can set the P2 matrix in the L2 penalty
``w*P2*w``. This gives a fine control over this penalty (Tikhonov
regularization). A 2d array is directly used as the square matrix P2. A
1d array is interpreted as diagonal (square) matrix. The default
``'identity'`` sets the identity matrix, which gives the usual squared
L2-norm. If you just want to exclude certain coefficients, pass a 1d
array filled with 1 and 0 for the coefficients to be excluded. Note that
P2 must be positive semi-definite. If ``X`` is a pandas DataFrame
with a categorical dtype and P2 has the same size as the number of columns,
the penalty of the categorical column will be applied to all the levels of
the categorical. Note that if P2 is two-dimensional, its size needs to be
of the same length as the expanded ``X`` matrix.
``'identity'`` and ``None`` set the identity matrix, which gives the usual
squared L2-norm. If you just want to exclude certain coefficients, pass a 1d
array filled with 1 and 0 for the coefficients to be excluded. Note that P2 must
be positive semi-definite. If ``X`` is a pandas DataFrame with a categorical
dtype and P2 has the same size as the number of columns, the penalty of the
categorical column will be applied to all the levels of the categorical. Note
that if P2 is two-dimensional, its size needs to be of the same length as the
expanded ``X`` matrix.
fit_intercept : bool, optional (default=True)
Specifies if a constant (a.k.a. bias or intercept) should be
Expand All @@ -2496,7 +2498,8 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
specify it in parentheses (e.g., ``'tweedie (1.5)'``). The same applies
for ``'negative.binomial'`` and theta parameter.
link : {'auto', 'identity', 'log', 'logit', 'cloglog'} or Link, optional (default='auto')
link : {'auto', 'identity', 'log', 'logit', 'cloglog'} oe Link, \
optional (default='auto')
The link function of the GLM, i.e. mapping from linear
predictor (``X * coef``) to expectation (``mu``). Option ``'auto'`` sets
the link depending on the chosen family as follows:
Expand All @@ -2510,8 +2513,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
optional (default='auto')
Algorithm to use in the optimization problem:
- ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'``
otherwise.
- ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'`` otherwise.
- ``'irls-cd'``: Iteratively reweighted least squares with a coordinate
descent inner solver. This can deal with L1 as well as L2 penalties.
Note that in order to avoid unnecessary memory duplication of X in the
Expand Down Expand Up @@ -2740,12 +2742,12 @@ def __init__(
self,
alpha=None,
l1_ratio=0,
P1="identity",
P2="identity",
P1: Optional[Union[str, np.ndarray]] = "identity",
P2: Optional[Union[str, np.ndarray, sparse.spmatrix]] = "identity",
fit_intercept=True,
family: Union[str, ExponentialDispersionModel] = "normal",
link: Union[str, Link] = "auto",
solver="auto",
solver: str = "auto",
max_iter=100,
gradient_tol: Optional[float] = None,
step_size_tol: Optional[float] = None,
Expand Down Expand Up @@ -2935,16 +2937,11 @@ def fit(

self._set_up_for_fit(y)

_dtype = [np.float64, np.float32]
if self._solver == "irls-cd":
_stype = ["csc"]
else:
_stype = ["csc", "csr"]

# 1.3 arguments to take special care ##################################
# P1, P2, start_params
stype = ["csc"] if self._solver == "irls-cd" else ["csc", "csr"]
P1_no_alpha = setup_p1(P1, X, X.dtype, 1, self.l1_ratio)
P2_no_alpha = setup_p2(P2, X, _stype, X.dtype, 1, self.l1_ratio)
P2_no_alpha = setup_p2(P2, X, stype, X.dtype, 1, self.l1_ratio)

lower_bounds = check_bounds(self.lower_bounds, X.shape[1], X.dtype)
upper_bounds = check_bounds(self.upper_bounds, X.shape[1], X.dtype)
Expand All @@ -2961,7 +2958,7 @@ def fit(
self.start_params,
n_cols=X.shape[1],
fit_intercept=self.fit_intercept,
_dtype=_dtype,
dtype=[np.float64, np.float32],
)

# 1.4 additional validations ##########################################
Expand Down
36 changes: 18 additions & 18 deletions src/glum/_glm_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
If you pass ``l1_ratio`` as an array, the ``fit`` method will choose the
best value of ``l1_ratio`` and store it as ``self.l1_ratio``.
P1 : {'identity', array-like}, shape (n_features,), optional (default='identity')
P1 : {'identity', array-like, None}, shape (n_features,), optional (default='identity')
This array controls the strength of the regularization for each coefficient
independently. A high value will lead to higher regularization while a value of
zero will remove the regularization on this parameter.
Expand All @@ -46,20 +46,20 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
the penalty of the categorical column will be applied to all the levels of
the categorical.
P2 : {'identity', array-like, sparse matrix}, shape (n_features,) \
P2 : {'identity', array-like, sparse matrix, None}, shape (n_features,) \
or (n_features, n_features), optional (default='identity')
With this option, you can set the P2 matrix in the L2 penalty
``w*P2*w``. This gives a fine control over this penalty (Tikhonov
regularization). A 2d array is directly used as the square matrix P2. A
1d array is interpreted as diagonal (square) matrix. The default
``'identity'`` sets the identity matrix, which gives the usual squared
L2-norm. If you just want to exclude certain coefficients, pass a 1d
array filled with 1 and 0 for the coefficients to be excluded. Note that
P2 must be positive semi-definite. If ``X`` is a pandas DataFrame
with a categorical dtype and P2 has the same size as the number of columns,
the penalty of the categorical column will be applied to all the levels of
the categorical. Note that if P2 is two-dimensional, its size needs to be
of the same length as the expanded ``X`` matrix.
``'identity'`` and ``None`` set the identity matrix, which gives the usual
squared L2-norm. If you just want to exclude certain coefficients, pass a 1d
array filled with 1 and 0 for the coefficients to be excluded. Note that P2 must
be positive semi-definite. If ``X`` is a pandas DataFrame with a categorical
dtype and P2 has the same size as the number of columns, the penalty of the
categorical column will be applied to all the levels of the categorical. Note
that if P2 is two-dimensional, its size needs to be of the same length as the
expanded ``X`` matrix.
fit_intercept : bool, optional (default=True)
Specifies if a constant (a.k.a. bias or intercept) should be
Expand All @@ -74,7 +74,8 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
specify it in parentheses (e.g., ``'tweedie (1.5)'``). The same applies
for ``'negative.binomial'`` and theta parameter.
link : {'auto', 'identity', 'log', 'logit', 'cloglog'} or Link, optional (default='auto')
link : {'auto', 'identity', 'log', 'logit', 'cloglog'}, Link or None, \
optional (default='auto')
The link function of the GLM, i.e. mapping from linear
predictor (``X * coef``) to expectation (``mu``). Option ``'auto'`` sets
the link depending on the chosen family as follows:
Expand All @@ -84,11 +85,11 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
``'inverse.gaussian'`` and ``'negative.binomial'``.
- ``'logit'`` for family ``'binomial'``
solver : {'auto', 'irls-cd', 'irls-ls', 'lbfgs'}, optional (default='auto')
solver : {'auto', 'irls-cd', 'irls-ls', 'lbfgs', 'trust-constr'}, \
optional (default='auto')
Algorithm to use in the optimization problem:
- ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'``
otherwise.
- ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'`` otherwise.
- ``'irls-cd'``: Iteratively reweighted least squares with a coordinate
descent inner solver. This can deal with L1 as well as L2 penalties.
Note that in order to avoid unnecessary memory duplication of X in the
Expand Down Expand Up @@ -291,7 +292,7 @@ def __init__(
fit_intercept=True,
family: Union[str, ExponentialDispersionModel] = "normal",
link: Union[str, Link] = "auto",
solver="auto",
solver: str = "auto",
max_iter=100,
gradient_tol: Optional[float] = None,
step_size_tol: Optional[float] = None,
Expand Down Expand Up @@ -525,12 +526,11 @@ def _get_deviance(coef):
):
assert isinstance(self._link_instance, LogLink)

_dtype = [np.float64, np.float32]
start_params = initialize_start_params(
self.start_params,
n_cols=X.shape[1],
fit_intercept=self.fit_intercept,
_dtype=_dtype,
dtype=[np.float64, np.float32],
)

P1_no_alpha = setup_p1(P1, X, X.dtype, 1, l1)
Expand Down Expand Up @@ -690,7 +690,7 @@ def _get_deviance(coef):
self.start_params,
n_cols=X.shape[1],
fit_intercept=self.fit_intercept,
_dtype=X.dtype,
dtype=X.dtype,
)

coef = self._get_start_coef(
Expand Down

0 comments on commit a80e043

Please sign in to comment.