ENH Add SLOPE penalty (#92)

PABannier · web-flow · commit a4362ba22857 · 2022-11-02T17:32:49.000+01:00
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -25,5 +25,6 @@ jobs:
         pip install numpydoc
         pip install .
         pip install statsmodels cvxopt
+        pip install git+https://github.com/jolars/pyslope.git
     - name: Test with pytest
       run: pytest -v skglm/
diff --git a/doc/api.rst b/doc/api.rst
@@ -43,6 +43,7 @@ Penalties
    WeightedGroupL2
    SCAD
    BlockSCAD
+   SLOPE
 
 
 Datafits
diff --git a/skglm/penalties/__init__.py b/skglm/penalties/__init__.py
@@ -6,9 +6,11 @@
     L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2
 )
 
+from .non_separable import SLOPE
+
 
 __all__ = [
     BasePenalty,
     L1_plus_L2, L0_5, L1, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox,
-    L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2
+    L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2, SLOPE
 ]
diff --git a/skglm/penalties/non_separable.py b/skglm/penalties/non_separable.py
@@ -0,0 +1,60 @@
+import numpy as np
+from numba import float64
+
+from skglm.penalties.base import BasePenalty
+from skglm.utils import prox_SLOPE
+
+
+class SLOPE(BasePenalty):
+    """Sorted L-One Penalized Estimation (SLOPE) penalty.
+
+    Attributes
+    ----------
+    alphas : array, shape (n_features,)
+        Contain regularization levels for every feature.
+        When ``alphas`` contain a single unique value, ``SLOPE``
+        is equivalent to the ``L1``penalty.
+
+    References
+    ----------
+    .. [1] M. Bogdan, E. van den Berg, C. Sabatti, W. Su, E. Candes
+        "SLOPE - Adaptive Variable Selection via Convex Optimization",
+        The Annals of Applied Statistics 9 (3): 1103-40
+        https://doi.org/10.1214/15-AOAS842
+    """
+
+    def __init__(self, alphas):
+        self.alphas = alphas
+
+    def get_spec(self):
+        spec = (
+            ('alphas', float64[:]),
+        )
+        return spec
+
+    def params_to_dict(self):
+        return dict(alphas=self.alphas)
+
+    def value(self, w):
+        """Compute the value of SLOPE at w."""
+        return np.sum(np.sort(np.abs(w)) * self.alphas[::-1])
+
+    def prox_vec(self, x, stepsize):
+        def _prox(_x, _alphas):
+            sign_x = np.sign(_x)
+            _x = np.abs(_x)
+            sorted_indices = np.argsort(_x)[::-1]
+            prox = prox_SLOPE(_x[sorted_indices], _alphas)
+            prox[sorted_indices] = prox
+            return prox * sign_x
+        return _prox(x, self.alphas * stepsize)
+
+    def prox_1d(self, value, stepsize, j):
+        raise ValueError(
+            "No coordinate-wise proximal operator for SLOPE. Use `prox_vec` instead."
+        )
+
+    def subdiff_distance(self, w, grad, ws):
+        return ValueError(
+            "No subdifferential distance for SLOPE. Use `opt_strategy='fixpoint'`"
+        )
diff --git a/skglm/solvers/fista.py b/skglm/solvers/fista.py
@@ -27,10 +27,13 @@ class FISTA(BaseSolver):
            https://epubs.siam.org/doi/10.1137/080716542
     """
 
-    def __init__(self, max_iter=100, tol=1e-4, verbose=0):
+    def __init__(self, max_iter=100, tol=1e-4, opt_strategy="subdiff", verbose=0):
         self.max_iter = max_iter
         self.tol = tol
         self.verbose = verbose
+        self.opt_strategy = opt_strategy
+        self.fit_intercept = False   # needed to be passed to GeneralizedLinearEstimator
+        self.warm_start = False
 
     def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         p_objs_out = []
@@ -60,11 +63,22 @@ def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
             step = 1 / lipschitz
             z -= step * grad
-            w = _prox_vec(w, z, penalty, step)
+            if hasattr(penalty, "prox_vec"):
+                w = penalty.prox_vec(z, step)
+            else:
+                w = _prox_vec(w, z, penalty, step)
             Xw = X @ w
             z = w + (t_old - 1.) / t_new * (w - w_old)
 
-            opt = penalty.subdiff_distance(w, grad, all_features)
+            if self.opt_strategy == "subdiff":
+                opt = penalty.subdiff_distance(w, grad, all_features)
+            elif self.opt_strategy == "fixpoint":
+                opt = np.abs(w - penalty.prox_vec(w - grad / lipschitz, 1 / lipschitz))
+            else:
+                raise ValueError(
+                    "Unknown error optimality strategy. Expected "
+                    f"`subdiff` or `fixpoint`. Got {self.opt_strategy}")
+
             stop_crit = np.max(opt)
 
             p_obj = datafit.value(y, w, Xw) + penalty.value(w)
diff --git a/skglm/tests/test_penalties.py b/skglm/tests/test_penalties.py
@@ -6,10 +6,10 @@
 
 from skglm.datafits import Quadratic, QuadraticMultiTask
 from skglm.penalties import (
-    L1, L1_plus_L2, WeightedL1, MCPenalty, SCAD, IndicatorBox, L0_5, L2_3,
+    L1, L1_plus_L2, WeightedL1, MCPenalty, SCAD, IndicatorBox, L0_5, L2_3, SLOPE,
     L2_1, L2_05, BlockMCPenalty, BlockSCAD)
-from skglm import GeneralizedLinearEstimator
-from skglm.solvers import AndersonCD, MultiTaskBCD
+from skglm import GeneralizedLinearEstimator, Lasso
+from skglm.solvers import AndersonCD, MultiTaskBCD, FISTA
 from skglm.utils import make_correlated_data
 
 
@@ -25,6 +25,8 @@
 alpha_max = norm(X.T @ y, ord=np.inf) / n_samples
 alpha = alpha_max / 1000
 
+tol = 1e-10
+
 penalties = [
     L1(alpha=alpha),
     L1_plus_L2(alpha=alpha, l1_ratio=0.5),
@@ -44,7 +46,6 @@
 
 @pytest.mark.parametrize('penalty', penalties)
 def test_subdiff_diff(penalty):
-    tol = 1e-10
     # tol=1e-14 is too low when coefs are of order 1. square roots are computed in
     # some penalties and precision is lost
     est = GeneralizedLinearEstimator(
@@ -58,7 +59,6 @@ def test_subdiff_diff(penalty):
 
 @pytest.mark.parametrize('block_penalty', block_penalties)
 def test_subdiff_diff_block(block_penalty):
-    tol = 1e-10  # see test_subdiff_dist
     est = GeneralizedLinearEstimator(
         datafit=QuadraticMultiTask(),
         penalty=block_penalty,
@@ -68,5 +68,38 @@ def test_subdiff_diff_block(block_penalty):
     assert_array_less(est.stop_crit_, est.solver.tol)
 
 
+def test_slope_lasso():
+    # check that when alphas = [alpha, ..., alpha], SLOPE and L1 solutions are equal
+    alphas = np.full(n_features, alpha)
+    est = GeneralizedLinearEstimator(
+        penalty=SLOPE(alphas),
+        solver=FISTA(max_iter=1000, tol=tol, opt_strategy="fixpoint"),
+    ).fit(X, y)
+    lasso = Lasso(alpha, fit_intercept=False, tol=tol).fit(X, y)
+    np.testing.assert_allclose(est.coef_, lasso.coef_, rtol=1e-5)
+
+
+def test_slope():
+    # compare solutions with `pyslope`: https://github.com/jolars/pyslope
+    try:
+        from slope.solvers import pgd_slope  # noqa
+        from slope.utils import lambda_sequence  # noqa
+    except ImportError:
+        pytest.xfail(
+            "This test requires slope to run.\n"
+            "https://github.com/jolars/pyslope")
+
+    q = 0.1
+    alphas = lambda_sequence(
+        X, y, fit_intercept=False, reg=alpha / alpha_max, q=q)
+    ours = GeneralizedLinearEstimator(
+        penalty=SLOPE(alphas),
+        solver=FISTA(max_iter=1000, tol=tol, opt_strategy="fixpoint"),
+    ).fit(X, y)
+    pyslope_out = pgd_slope(
+        X, y, alphas, fit_intercept=False, max_it=1000, gap_tol=tol)
+    np.testing.assert_allclose(ours.coef_, pyslope_out["beta"], rtol=1e-5)
+
+
 if __name__ == "__main__":
     pass
diff --git a/skglm/utils.py b/skglm/utils.py
@@ -559,5 +559,52 @@ def _XT_dot_vec(X_data, X_indptr, X_indices, vec):
     return result
 
 
+@njit
+def prox_SLOPE(z, alphas):
+    """Fast computation for proximal operator of SLOPE.
+
+    Extracted from:
+    https://github.com/agisga/grpSLOPE/blob/master/src/proxSortedL1.c
+
+    Parameters
+    ----------
+    z : array, shape (n_features,)
+        Non-negative coefficient vector sorted in non-increasing order.
+
+    alphas : array, shape (n_features,)
+        Regularization hyperparameter sorted in non-increasing order.
+    """
+    n_features = z.shape[0]
+    x = np.empty(n_features)
+
+    k = 0
+    idx_i = np.empty((n_features,), dtype=np.int64)
+    idx_j = np.empty((n_features,), dtype=np.int64)
+    s = np.empty((n_features,), dtype=np.float64)
+    w = np.empty((n_features,), dtype=np.float64)
+
+    for i in range(n_features):
+        idx_i[k] = i
+        idx_j[k] = i
+        s[k] = z[i] - alphas[i]
+        w[k] = s[k]
+
+        while k > 0 and w[k - 1] <= w[k]:
+            k -= 1
+            idx_j[k] = i
+            s[k] += s[k+1]
+            w[k] = s[k] / (i - idx_i[k] + 1)
+
+        k += 1
+
+    for j in range(k):
+        d = w[j]
+        d = 0 if d < 0 else d
+        for i in range(idx_i[j], idx_j[j] + 1):
+            x[i] = d
+
+    return x
+
+
 if __name__ == '__main__':
     pass

Original file line number	Diff line number	Diff line change
`@@ -6,9 +6,11 @@`
`6`	`6`	`L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2`
`7`	`7`	`)`
`8`	`8`
	`9`	`+from .non_separable import SLOPE`
	`10`	`+`
`9`	`11`
`10`	`12`	`__all__ = [`
`11`	`13`	`BasePenalty,`
`12`	`14`	`L1_plus_L2, L0_5, L1, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox,`
`13`		`- L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2`
	`15`	`+ L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2, SLOPE`
`14`	`16`	`]`