scikit-learn-contrib
diff --git a/‎examples/plot_sparse_recovery.py
+1-2 b/‎examples/plot_sparse_recovery.py
+1-2
diff --git a/‎examples/plot_survival_analysis.py
+5-6 b/‎examples/plot_survival_analysis.py
+5-6
diff --git a/‎skglm/estimators.py
+19-28 b/‎skglm/estimators.py
+19-28
diff --git a/‎skglm/experimental/reweighted.py
+2-2 b/‎skglm/experimental/reweighted.py
+2-2
diff --git a/‎skglm/experimental/sqrt_lasso.py
+2-3 b/‎skglm/experimental/sqrt_lasso.py
+2-3
diff --git a/‎skglm/experimental/tests/test_quantile_regression.py
+2-3 b/‎skglm/experimental/tests/test_quantile_regression.py
+2-3
diff --git a/‎skglm/experimental/tests/test_sqrt_lasso.py
+2-3 b/‎skglm/experimental/tests/test_sqrt_lasso.py
+2-3
diff --git a/‎skglm/solvers/base.py
+31-2 b/‎skglm/solvers/base.py
+31-2
diff --git a/‎skglm/solvers/common.py
+1-2 b/‎skglm/solvers/common.py
+1-2
diff --git a/‎skglm/solvers/fista.py
+2 b/‎skglm/solvers/fista.py
+2
diff --git a/‎skglm/solvers/group_prox_newton.py
+7 b/‎skglm/solvers/group_prox_newton.py
+7
diff --git a/‎skglm/solvers/lbfgs.py
+13-6 b/‎skglm/solvers/lbfgs.py
+13-6
@@ -18,7 +18,6 @@
 from skglm.utils.data import make_correlated_data
 from skglm.solvers import AndersonCD
 from skglm.datafits import Quadratic
-from skglm.utils.jit_compilation import compiled_clone
 from skglm.penalties import L1, MCPenalty, L0_5, L2_3, SCAD
 
 cmap = plt.get_cmap('tab10')
@@ -74,7 +73,7 @@
 for idx, estimator in enumerate(penalties.keys()):
     print(f'Running {estimator}...')
     estimator_path = solver.path(
-        X, y, compiled_clone(datafit), compiled_clone(penalties[estimator]),
+        X, y, datafit, penalties[estimator],
         alphas=alphas)
 
     f1_temp = np.zeros(n_alphas)
 
@@ -15,6 +15,7 @@
 # Let's first generate synthetic data on which to run the Cox estimator,
 # using ``skglm`` data utils.
 #
+
 from skglm.utils.data import make_dummy_survival_data
 
 n_samples, n_features = 500, 100
@@ -59,18 +60,16 @@
 # Todo so, we need to combine a Cox datafit and a :math:`\ell_1` penalty
 # and solve the resulting problem using skglm Proximal Newton solver ``ProxNewton``.
 # We set the intensity of the :math:`\ell_1` regularization to ``alpha=1e-2``.
-from skglm.datafits import Cox
 from skglm.penalties import L1
+from skglm.datafits import Cox
 from skglm.solvers import ProxNewton
 
-from skglm.utils.jit_compilation import compiled_clone
-
 # regularization intensity
 alpha = 1e-2
 
 # skglm internals: init datafit and penalty
-datafit = compiled_clone(Cox())
-penalty = compiled_clone(L1(alpha))
+datafit = Cox()
+penalty = L1(alpha)
 
 datafit.initialize(X, y)
 
@@ -230,7 +229,7 @@
 # We only need to pass in ``use_efron=True`` to the ``Cox`` datafit.
 
 # ensure using Efron estimate
-datafit = compiled_clone(Cox(use_efron=True))
+datafit = Cox(use_efron=True)
 datafit.initialize(X, y)
 
 # solve the problem
 
@@ -18,7 +18,6 @@
 from sklearn.utils._param_validation import Interval, StrOptions
 from sklearn.multiclass import OneVsRestClassifier, check_classification_targets
 
-from skglm.utils.jit_compilation import compiled_clone
 from skglm.solvers import AndersonCD, MultiTaskBCD, GroupBCD
 from skglm.datafits import (Cox, Quadratic, Logistic, QuadraticSVC,
                             QuadraticMultiTask, QuadraticGroup,)
@@ -102,12 +101,10 @@ def _glm_fit(X, y, model, datafit, penalty, solver):
 
     n_samples, n_features = X_.shape
 
-    penalty_jit = compiled_clone(penalty)
-    datafit_jit = compiled_clone(datafit, to_float32=X.dtype == np.float32)
     if issparse(X):
-        datafit_jit.initialize_sparse(X_.data, X_.indptr, X_.indices, y)
+        datafit.initialize_sparse(X_.data, X_.indptr, X_.indices, y)
     else:
-        datafit_jit.initialize(X_, y)
+        datafit.initialize(X_, y)
 
     # if model.warm_start and hasattr(model, 'coef_') and model.coef_ is not None:
     if solver.warm_start and hasattr(model, 'coef_') and model.coef_ is not None:
@@ -136,7 +133,7 @@ def _glm_fit(X, y, model, datafit, penalty, solver):
                 "The size of the WeightedL1 penalty weights should be n_features, "
                 "expected %i, got %i." % (X_.shape[1], len(penalty.weights)))
 
-    coefs, p_obj, kkt = solver.solve(X_, y, datafit_jit, penalty_jit, w, Xw)
+    coefs, p_obj, kkt = solver.solve(X_, y, datafit, penalty, w, Xw)
     model.coef_, model.stop_crit_ = coefs[:n_features], kkt
     if y.ndim == 1:
         model.intercept_ = coefs[-1] if fit_intercept else 0.
@@ -440,8 +437,8 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True, **params):
             The number of iterations along the path. If return_n_iter is set to
             ``True``.
         """
-        penalty = compiled_clone(L1(self.alpha, self.positive))
-        datafit = compiled_clone(Quadratic(), to_float32=X.dtype == np.float32)
+        penalty = L1(self.alpha, self.positive)
+        datafit = Quadratic()
         solver = AndersonCD(
             self.max_iter, self.max_epochs, self.p0, tol=self.tol,
             ws_strategy=self.ws_strategy, fit_intercept=self.fit_intercept,
@@ -581,8 +578,8 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True, **params):
             raise ValueError("The number of weights must match the number of \
                               features. Got %s, expected %s." % (
                 len(weights), X.shape[1]))
-        penalty = compiled_clone(WeightedL1(self.alpha, weights, self.positive))
-        datafit = compiled_clone(Quadratic(), to_float32=X.dtype == np.float32)
+        penalty = WeightedL1(self.alpha, weights, self.positive)
+        datafit = Quadratic()
         solver = AndersonCD(
             self.max_iter, self.max_epochs, self.p0, tol=self.tol,
             ws_strategy=self.ws_strategy, fit_intercept=self.fit_intercept,
@@ -744,8 +741,8 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True, **params):
             The number of iterations along the path. If return_n_iter is set to
             ``True``.
         """
-        penalty = compiled_clone(L1_plus_L2(self.alpha, self.l1_ratio, self.positive))
-        datafit = compiled_clone(Quadratic(), to_float32=X.dtype == np.float32)
+        penalty = L1_plus_L2(self.alpha, self.l1_ratio, self.positive)
+        datafit = Quadratic()
         solver = AndersonCD(
             self.max_iter, self.max_epochs, self.p0, tol=self.tol,
             ws_strategy=self.ws_strategy, fit_intercept=self.fit_intercept,
@@ -917,19 +914,17 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True, **params):
             ``True``.
         """
         if self.weights is None:
-            penalty = compiled_clone(
-                MCPenalty(self.alpha, self.gamma, self.positive)
-            )
+            penalty = MCPenalty(self.alpha, self.gamma, self.positive)
         else:
             if X.shape[1] != len(self.weights):
                 raise ValueError(
                     "The number of weights must match the number of features. "
                     f"Got {len(self.weights)}, expected {X.shape[1]}."
                 )
-            penalty = compiled_clone(
-                WeightedMCPenalty(self.alpha, self.gamma, self.weights, self.positive)
-            )
-        datafit = compiled_clone(Quadratic(), to_float32=X.dtype == np.float32)
+            penalty = WeightedMCPenalty(
+                self.alpha, self.gamma, self.weights, self.positive)
+
+        datafit = Quadratic()
         solver = AndersonCD(
             self.max_iter, self.max_epochs, self.p0, tol=self.tol,
             ws_strategy=self.ws_strategy, fit_intercept=self.fit_intercept,
@@ -1369,10 +1364,6 @@ def fit(self, X, y):
         else:
             penalty = L2(self.alpha)
 
-        # skglm internal: JIT compile classes
-        datafit = compiled_clone(datafit)
-        penalty = compiled_clone(penalty)
-
         # init solver
         if self.l1_ratio == 0.:
             solver = LBFGS(max_iter=self.max_iter, tol=self.tol, verbose=self.verbose)
@@ -1518,14 +1509,14 @@ def fit(self, X, Y):
         if not self.warm_start or not hasattr(self, "coef_"):
             self.coef_ = None
 
-        datafit_jit = compiled_clone(QuadraticMultiTask(), X.dtype == np.float32)
-        penalty_jit = compiled_clone(L2_1(self.alpha), X.dtype == np.float32)
+        datafit = QuadraticMultiTask()
+        penalty = L2_1(self.alpha)
 
         solver = MultiTaskBCD(
             self.max_iter, self.max_epochs, self.p0, tol=self.tol,
             ws_strategy=self.ws_strategy, fit_intercept=self.fit_intercept,
             warm_start=self.warm_start, verbose=self.verbose)
-        W, obj_out, kkt = solver.solve(X, Y, datafit_jit, penalty_jit)
+        W, obj_out, kkt = solver.solve(X, Y, datafit, penalty)
 
         self.coef_ = W[:X.shape[1], :].T
         self.intercept_ = self.fit_intercept * W[-1, :]
@@ -1573,8 +1564,8 @@ def path(self, X, Y, alphas, coef_init=None, return_n_iter=False, **params):
             The number of iterations along the path. If return_n_iter is set to
             ``True``.
         """
-        datafit = compiled_clone(QuadraticMultiTask(), to_float32=X.dtype == np.float32)
-        penalty = compiled_clone(L2_1(self.alpha))
+        datafit = QuadraticMultiTask()
+        penalty = L2_1(self.alpha)
         solver = MultiTaskBCD(
             self.max_iter, self.max_epochs, self.p0, tol=self.tol,
             ws_strategy=self.ws_strategy, fit_intercept=self.fit_intercept,
 
@@ -69,9 +69,9 @@ def fit(self, X, y):
                 f"penalty {self.penalty.__class__.__name__}")
 
         n_features = X.shape[1]
-        _penalty = compiled_clone(WeightedL1(self.penalty.alpha, np.ones(n_features)))
-        self.datafit = compiled_clone(self.datafit)
+        # we need to compile this as it is not passed to solver.solve:
         self.penalty = compiled_clone(self.penalty)
+        _penalty = WeightedL1(self.penalty.alpha, np.ones(n_features))
 
         self.loss_history_ = []
 
 
@@ -6,7 +6,6 @@
 
 from skglm.penalties import L1
 from skglm.utils.prox_funcs import ST_vec, proj_L2ball, BST
-from skglm.utils.jit_compilation import compiled_clone
 from skglm.datafits.base import BaseDatafit
 from skglm.solvers.prox_newton import ProxNewton
 
@@ -179,8 +178,8 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
             alphas = np.sort(alphas)[::-1]
 
         n_features = X.shape[1]
-        sqrt_quadratic = compiled_clone(SqrtQuadratic())
-        l1_penalty = compiled_clone(L1(1.))  # alpha is set along the path
+        sqrt_quadratic = SqrtQuadratic()
+        l1_penalty = L1(1.)  # alpha is set along the path
 
         coefs = np.zeros((n_alphas, n_features))
 
 
@@ -6,7 +6,6 @@
 from skglm import GeneralizedLinearEstimator
 from skglm.experimental.pdcd_ws import PDCD_WS
 from skglm.experimental.quantile_regression import Pinball
-from skglm.utils.jit_compilation import compiled_clone
 
 from skglm.utils.data import make_correlated_data
 from sklearn.linear_model import QuantileRegressor
@@ -23,8 +22,8 @@ def test_PDCD_WS(quantile_level):
     alpha_max = norm(X.T @ (np.sign(y)/2 + (quantile_level - 0.5)), ord=np.inf)
     alpha = alpha_max / 5
 
-    datafit = compiled_clone(Pinball(quantile_level))
-    penalty = compiled_clone(L1(alpha))
+    datafit = Pinball(quantile_level)
+    penalty = L1(alpha)
 
     w = PDCD_WS(
         dual_init=np.sign(y)/2 + (quantile_level - 0.5)
 
@@ -7,7 +7,6 @@
 from skglm.experimental.sqrt_lasso import (SqrtLasso, SqrtQuadratic,
                                            _chambolle_pock_sqrt)
 from skglm.experimental.pdcd_ws import PDCD_WS
-from skglm.utils.jit_compilation import compiled_clone
 
 
 def test_alpha_max():
@@ -70,8 +69,8 @@ def test_PDCD_WS(with_dual_init):
 
     dual_init = y / norm(y) if with_dual_init else None
 
-    datafit = compiled_clone(SqrtQuadratic())
-    penalty = compiled_clone(L1(alpha))
+    datafit = SqrtQuadratic()
+    penalty = L1(alpha)
 
     w = PDCD_WS(dual_init=dual_init).solve(X, y, datafit, penalty)[0]
     clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y)
 
@@ -1,5 +1,10 @@
+import warnings
 from abc import abstractmethod, ABC
+
+import numpy as np
+
 from skglm.utils.validation import check_attrs
+from skglm.utils.jit_compilation import compiled_clone
 
 
 class BaseSolver(ABC):
@@ -89,8 +94,9 @@ def custom_checks(self, X, y, datafit, penalty):
         """
         pass
 
-    def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None,
-              *, run_checks=True):
+    def solve(
+        self, X, y, datafit, penalty, w_init=None, Xw_init=None, *, run_checks=True
+    ):
         """Solve the optimization problem after validating its compatibility.
 
         A proxy of ``_solve`` method that implicitly ensures the compatibility
@@ -101,6 +107,29 @@ def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None,
         >>> ...
         >>> coefs, obj_out, stop_crit = solver.solve(X, y, datafit, penalty)
         """
+        # TODO check for datafit/penalty being jit-compiled properly
+        # instead of searching for a string
+        if "jitclass" in str(type(datafit)):
+            warnings.warn(
+                "Passing in a compiled datafit is deprecated since skglm v0.5 "
+                "Compilation is now done inside solver."
+                "This will raise an error starting skglm v0.6 onwards."
+            )
+        elif datafit is not None:
+            datafit = compiled_clone(datafit, to_float32=X.dtype == np.float32)
+
+        if "jitclass" in str(type(penalty)):
+            warnings.warn(
+                "Passing in a compiled penalty is deprecated since skglm v0.5 "
+                "Compilation is now done inside solver. "
+                "This will raise an error starting skglm v0.6 onwards."
+            )
+        elif penalty is not None:
+            penalty = compiled_clone(penalty)
+            # TODO add support for bool spec in compiled_clone
+            # currently, doing so break the code
+            # penalty = compiled_clone(penalty, to_float32=X.dtype == np.float32)
+
         if run_checks:
             self._validate(X, y, datafit, penalty)
 
 
@@ -46,8 +46,7 @@ def dist_fix_point_cd(w, grad_ws, lipschitz_ws, datafit, penalty, ws):
 
 
 @njit
-def dist_fix_point_bcd(
-        w, grad_ws, lipschitz_ws, datafit, penalty, ws):
+def dist_fix_point_bcd(w, grad_ws, lipschitz_ws, datafit, penalty, ws):
     """Compute the violation of the fixed point iterate scheme for BCD.
 
     Parameters
 
@@ -51,10 +51,12 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         Xw = Xw_init.copy() if Xw_init is not None else np.zeros(n_samples)
 
         if X_is_sparse:
+            datafit.initialize_sparse(X.data, X.indptr, X.indices, y)
             lipschitz = datafit.get_global_lipschitz_sparse(
                 X.data, X.indptr, X.indices, y
             )
         else:
+            datafit.initialize(X, y)
             lipschitz = datafit.get_global_lipschitz(X, y)
 
         for n_iter in range(self.max_iter):
 
@@ -69,6 +69,13 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         stop_crit = 0.
         p_objs_out = []
 
+        # TODO: to be isolated in a seperated method
+        is_sparse = issparse(X)
+        if is_sparse:
+            datafit.initialize_sparse(X.data, X.indptr, X.indices, y)
+        else:
+            datafit.initialize(X, y)
+
         for iter in range(self.max_iter):
             grad = _construct_grad(X, y, w, Xw, datafit, all_groups)
 
 
@@ -38,6 +38,13 @@ def __init__(self, max_iter=50, tol=1e-4, verbose=False):
 
     def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
+        # TODO: to be isolated in a seperated method
+        is_sparse = issparse(X)
+        if is_sparse:
+            datafit.initialize_sparse(X.data, X.indptr, X.indices, y)
+        else:
+            datafit.initialize(X, y)
+
         def objective(w):
             Xw = X @ w
             datafit_value = datafit.value(y, w, Xw)
@@ -70,8 +77,7 @@ def callback_post_iter(w_k):
 
                 it = len(p_objs_out)
                 print(
-                    f"Iteration {it}: {p_obj:.10f}, "
-                    f"stopping crit: {stop_crit:.2e}"
+                    f"Iteration {it}: {p_obj:.10f}, " f"stopping crit: {stop_crit:.2e}"
                 )
 
         n_features = X.shape[1]
@@ -87,7 +93,7 @@ def callback_post_iter(w_k):
             options=dict(
                 maxiter=self.max_iter,
                 gtol=self.tol,
-                ftol=0.  # set ftol=0. to control convergence using only gtol
+                ftol=0.0,  # set ftol=0. to control convergence using only gtol
             ),
             callback=callback_post_iter,
         )
@@ -97,7 +103,7 @@ def callback_post_iter(w_k):
                 f"`LBFGS` did not converge for tol={self.tol:.3e} "
                 f"and max_iter={self.max_iter}.\n"
                 "Consider increasing `max_iter` and/or `tol`.",
-                category=ConvergenceWarning
+                category=ConvergenceWarning,
             )
 
         w = result.x
@@ -110,7 +116,8 @@ def callback_post_iter(w_k):
     def custom_checks(self, X, y, datafit, penalty):
         # check datafit support sparse data
         check_attrs(
-            datafit, solver=self,
+            datafit,
+            solver=self,
             required_attr=self._datafit_required_attr,
-            support_sparse=issparse(X)
+            support_sparse=issparse(X),
         )