From 4facf2460c4c7be3ecade8e23ebf736c3d77fff1 Mon Sep 17 00:00:00 2001
From: mathurinm <mathurin.massias@gmail.com>
Date: Wed, 18 Sep 2024 19:02:15 +0200
Subject: [PATCH 1/8] add debug script, prints, and use smaller stepsizes

---
 debug.py                      | 48 +++++++++++++++++++++++++++++++++++
 skglm/experimental/pdcd_ws.py | 24 ++++++++++++------
 2 files changed, 64 insertions(+), 8 deletions(-)
 create mode 100644 debug.py

diff --git a/debug.py b/debug.py
new file mode 100644
index 000000000..7f8b76eb1
--- /dev/null
+++ b/debug.py
@@ -0,0 +1,48 @@
+import numpy as np
+from skglm import GeneralizedLinearEstimator
+from skglm.experimental.pdcd_ws import PDCD_WS
+from skglm.experimental.quantile_regression import Pinball
+from skglm.penalties import L1
+from sklearn.datasets import make_regression
+from sklearn.preprocessing import StandardScaler
+from skglm.utils.jit_compilation import compiled_clone
+
+
+def generate_dummy_data(n_samples=1000, n_features=10, noise=0.1):
+    X, y = make_regression(n_samples=n_samples, n_features=n_features, noise=noise)
+    return X, y
+
+
+np.random.seed(42)
+
+datafit = Pinball(0.5)
+penalty = L1(alpha=0.1)
+solver = PDCD_WS(
+    max_iter=10,
+    max_epochs=100,
+    tol=1e-2,
+    warm_start=False,
+    verbose=1,
+)
+
+# estimator = GeneralizedLinearEstimator(
+#     datafit=datafit,
+#     penalty=penalty,
+#     solver=solver,
+# )
+
+X, y = generate_dummy_data(
+    n_samples=1000, # if this is reduced to 100 samples, it converges
+    n_features=10,
+)
+# y -= y.mean()
+# y += 0.1
+scaler = StandardScaler()
+X_scaled = scaler.fit_transform(X)
+
+df = compiled_clone(datafit)
+pen = compiled_clone(penalty)
+
+res = solver.solve(X, y, df, pen)
+
+# estimator.fit(X, y)
diff --git a/skglm/experimental/pdcd_ws.py b/skglm/experimental/pdcd_ws.py
index 5ef49e5d4..b93ba8223 100644
--- a/skglm/experimental/pdcd_ws.py
+++ b/skglm/experimental/pdcd_ws.py
@@ -84,7 +84,7 @@ class PDCD_WS(BaseSolver):
 
     def __init__(
         self, max_iter=1000, max_epochs=1000, dual_init=None, p0=100, tol=1e-6,
-        fit_intercept=False, warm_start=True, verbose=False
+        fit_intercept=False, warm_start=True, verbose=0
     ):
         self.max_iter = max_iter
         self.max_epochs = max_epochs
@@ -102,8 +102,8 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         # Despite violating the conditions mentioned in [1]
         # this choice of steps yield in practice a convergent algorithm
         # with better speed of convergence
-        dual_step = 1 / norm(X, ord=2)
-        primal_steps = 1 / norm(X, axis=0, ord=2)
+        dual_step = 1 / norm(X, ord=2) / 10
+        primal_steps = 1 / norm(X, axis=0, ord=2) / 10
 
         # primal vars
         w = np.zeros(n_features) if w_init is None else w_init
@@ -148,9 +148,13 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
             # solve sub problem
             # inplace update of w, Xw, z, z_bar
+            if iteration == 0:
+                ep = 500
+            else:
+                ep = self.max_epochs
             PDCD_WS._solve_subproblem(
                 y, X, w, Xw, z, z_bar, datafit, penalty,
-                primal_steps, dual_step, ws, self.max_epochs, tol_in=0.3*stop_crit)
+                primal_steps, dual_step, ws, ep, tol_in=0.3*stop_crit, verbose=self.verbose-1)
 
             current_p_obj = datafit.value(y, w, Xw) + penalty.value(w)
             p_objs.append(current_p_obj)
@@ -166,8 +170,9 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
     @staticmethod
     @njit
-    def _solve_subproblem(y, X, w, Xw, z, z_bar, datafit, penalty,
-                          primal_steps, dual_step, ws, max_epochs, tol_in):
+    def _solve_subproblem(
+            y, X, w, Xw, z, z_bar, datafit, penalty, primal_steps,
+            dual_step, ws, max_epochs, tol_in, verbose):
         n_features = X.shape[1]
 
         for epoch in range(max_epochs):
@@ -191,12 +196,15 @@ def _solve_subproblem(y, X, w, Xw, z, z_bar, datafit, penalty,
                 z += (z_bar - z) / n_features
 
             # check convergence using fixed-point criteria on both dual and primal
-            if epoch % 10 == 0:
+            if epoch % 1 == 0:
                 opts_primal_in = _scores_primal(X, w, z, penalty, primal_steps, ws)
                 opt_dual_in = _score_dual(y, z, Xw, datafit, dual_step)
 
                 stop_crit_in = max(max(opts_primal_in), opt_dual_in)
-
+                if verbose:
+                    print(f'  epoch {epoch}, inner stopping crit: ', stop_crit_in)
+                    print(opt_dual_in)
+                    print(opts_primal_in)
                 if stop_crit_in <= tol_in:
                     break
 

From d25d9fd8e2b62d8b2598a2ffd4111309eb6b8162 Mon Sep 17 00:00:00 2001
From: mathurinm <mathurin.massias@gmail.com>
Date: Wed, 18 Sep 2024 19:06:30 +0200
Subject: [PATCH 2/8] no scale

---
 debug.py                      | 7 ++++---
 skglm/experimental/pdcd_ws.py | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/debug.py b/debug.py
index 7f8b76eb1..be293c480 100644
--- a/debug.py
+++ b/debug.py
@@ -18,8 +18,8 @@ def generate_dummy_data(n_samples=1000, n_features=10, noise=0.1):
 datafit = Pinball(0.5)
 penalty = L1(alpha=0.1)
 solver = PDCD_WS(
-    max_iter=10,
-    max_epochs=100,
+    max_iter=50,
+    max_epochs=500,
     tol=1e-2,
     warm_start=False,
     verbose=1,
@@ -33,10 +33,11 @@ def generate_dummy_data(n_samples=1000, n_features=10, noise=0.1):
 
 X, y = generate_dummy_data(
     n_samples=1000, # if this is reduced to 100 samples, it converges
-    n_features=10,
+    n_features=11,
 )
 # y -= y.mean()
 # y += 0.1
+y /= 10
 scaler = StandardScaler()
 X_scaled = scaler.fit_transform(X)
 
diff --git a/skglm/experimental/pdcd_ws.py b/skglm/experimental/pdcd_ws.py
index b93ba8223..f6d778788 100644
--- a/skglm/experimental/pdcd_ws.py
+++ b/skglm/experimental/pdcd_ws.py
@@ -102,8 +102,8 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         # Despite violating the conditions mentioned in [1]
         # this choice of steps yield in practice a convergent algorithm
         # with better speed of convergence
-        dual_step = 1 / norm(X, ord=2) / 10
-        primal_steps = 1 / norm(X, axis=0, ord=2) / 10
+        dual_step = 1 / norm(X, ord=2)
+        primal_steps = 1 / norm(X, axis=0, ord=2)
 
         # primal vars
         w = np.zeros(n_features) if w_init is None else w_init

From f59c9cbde071ba75483868fb980842e83ab62735 Mon Sep 17 00:00:00 2001
From: Badr-MOUFAD <badr.moufad@emines.um6p.ma>
Date: Sat, 21 Sep 2024 19:40:25 +0200
Subject: [PATCH 3/8] fix stepsizes & add comment

---
 debug.py                      | 61 +++++++++++++++++++++++------------
 skglm/experimental/pdcd_ws.py | 57 +++++++++++++++++++-------------
 2 files changed, 76 insertions(+), 42 deletions(-)

diff --git a/debug.py b/debug.py
index be293c480..5daa97c46 100644
--- a/debug.py
+++ b/debug.py
@@ -1,3 +1,4 @@
+# %%
 import numpy as np
 from skglm import GeneralizedLinearEstimator
 from skglm.experimental.pdcd_ws import PDCD_WS
@@ -6,44 +7,64 @@
 from sklearn.datasets import make_regression
 from sklearn.preprocessing import StandardScaler
 from skglm.utils.jit_compilation import compiled_clone
+from sklearn.linear_model import QuantileRegressor
 
 
 def generate_dummy_data(n_samples=1000, n_features=10, noise=0.1):
     X, y = make_regression(n_samples=n_samples, n_features=n_features, noise=noise)
+    # y -= y.mean()
+    # y += 0.1
+    y /= 10
     return X, y
 
 
 np.random.seed(42)
 
-datafit = Pinball(0.5)
-penalty = L1(alpha=0.1)
+quantile_level = 0.5
+alpha = 0.1
+
+X, y = generate_dummy_data(
+    n_samples=1000,  # if this is reduced to 100 samples, it converges
+    n_features=11,
+)
+
 solver = PDCD_WS(
+    p0=11,
     max_iter=50,
     max_epochs=500,
-    tol=1e-2,
+    tol=1e-5,
     warm_start=False,
-    verbose=1,
+    verbose=2,
 )
 
-# estimator = GeneralizedLinearEstimator(
-#     datafit=datafit,
-#     penalty=penalty,
-#     solver=solver,
-# )
-
-X, y = generate_dummy_data(
-    n_samples=1000, # if this is reduced to 100 samples, it converges
-    n_features=11,
-)
-# y -= y.mean()
-# y += 0.1
-y /= 10
-scaler = StandardScaler()
-X_scaled = scaler.fit_transform(X)
+datafit = Pinball(quantile_level)
+penalty = L1(alpha=alpha)
 
 df = compiled_clone(datafit)
 pen = compiled_clone(penalty)
 
 res = solver.solve(X, y, df, pen)
 
-# estimator.fit(X, y)
+# %%
+
+clf = QuantileRegressor(
+    quantile=quantile_level,
+    alpha=alpha/len(y),
+    fit_intercept=False,
+    solver='highs',
+).fit(X, y)
+
+# %%
+print("diff solution:", np.linalg.norm((clf.coef_ - res[0])))
+
+# %%
+
+
+def obj_val(w):
+    return df.value(y, w, X @ w) + pen.value(w)
+
+
+for label, w in zip(("skglm", "sklearn"), (res[0], clf.coef_)):
+    print(f"{label:10} {obj_val(w)=}")
+
+# %%
diff --git a/skglm/experimental/pdcd_ws.py b/skglm/experimental/pdcd_ws.py
index f6d778788..ab5c39369 100644
--- a/skglm/experimental/pdcd_ws.py
+++ b/skglm/experimental/pdcd_ws.py
@@ -99,11 +99,22 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         n_samples, n_features = X.shape
 
         # init steps
-        # Despite violating the conditions mentioned in [1]
-        # this choice of steps yield in practice a convergent algorithm
-        # with better speed of convergence
-        dual_step = 1 / norm(X, ord=2)
-        primal_steps = 1 / norm(X, axis=0, ord=2)
+        # choose steps to verify condition: Assumption 2.1 e)
+        scale = np.sqrt(2 * n_features)
+        dual_steps = 1 / (norm(X, ord=2, axis=1) * scale)
+        primal_steps = 1 / ((dual_steps[:, None] * (X ** 2)).sum(axis=0) * scale)
+
+        # NOTE: primal and dual steps verify condition on steps when multiplied/divided
+        # by an arbitrary positive constant
+        # HACK: balance primal and dual variable: take bigger steps
+        # in the space with highest number of variable
+        ratio = n_samples / n_features
+        if n_samples > n_features:
+            dual_steps *= ratio
+            primal_steps /= ratio
+        else:
+            dual_steps /= ratio
+            primal_steps *= ratio
 
         # primal vars
         w = np.zeros(n_features) if w_init is None else w_init
@@ -125,7 +136,7 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
             # check convergence using fixed-point criteria on both dual and primal
             opts_primal = _scores_primal(X, w, z, penalty, primal_steps, all_features)
-            opt_dual = _score_dual(y, z, Xw, datafit, dual_step)
+            opt_dual = _score_dual(y, z, Xw, datafit, dual_steps)
 
             stop_crit = max(max(opts_primal), opt_dual)
 
@@ -148,13 +159,9 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
             # solve sub problem
             # inplace update of w, Xw, z, z_bar
-            if iteration == 0:
-                ep = 500
-            else:
-                ep = self.max_epochs
             PDCD_WS._solve_subproblem(
                 y, X, w, Xw, z, z_bar, datafit, penalty,
-                primal_steps, dual_step, ws, ep, tol_in=0.3*stop_crit, verbose=self.verbose-1)
+                primal_steps, dual_steps, ws, self.max_epochs, tol_in=0.3*stop_crit, verbose=self.verbose-1)
 
             current_p_obj = datafit.value(y, w, Xw) + penalty.value(w)
             p_objs.append(current_p_obj)
@@ -172,7 +179,7 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
     @njit
     def _solve_subproblem(
             y, X, w, Xw, z, z_bar, datafit, penalty, primal_steps,
-            dual_step, ws, max_epochs, tol_in, verbose):
+            dual_steps, ws, max_epochs, tol_in, verbose):
         n_features = X.shape[1]
 
         for epoch in range(max_epochs):
@@ -191,20 +198,26 @@ def _solve_subproblem(
                     Xw += delta_w_j * X[:, j]
 
                 # update dual
-                z_bar[:] = datafit.prox_conjugate(z + dual_step * Xw,
-                                                  dual_step, y)
+                z_bar[:] = datafit.prox_conjugate(z + dual_steps * Xw,
+                                                  dual_steps, y)
                 z += (z_bar - z) / n_features
 
             # check convergence using fixed-point criteria on both dual and primal
             if epoch % 1 == 0:
                 opts_primal_in = _scores_primal(X, w, z, penalty, primal_steps, ws)
-                opt_dual_in = _score_dual(y, z, Xw, datafit, dual_step)
+                opt_dual_in = _score_dual(y, z, Xw, datafit, dual_steps)
 
                 stop_crit_in = max(max(opts_primal_in), opt_dual_in)
-                if verbose:
-                    print(f'  epoch {epoch}, inner stopping crit: ', stop_crit_in)
-                    print(opt_dual_in)
-                    print(opts_primal_in)
+                # if verbose:
+                #     current_p_obj = datafit.value(y, w, X@w) + penalty.value(w)
+                #     print(
+                #         f"|----- epoch {epoch+1}: {current_p_obj:.10f}, "
+                #         f"opt primal: {max(opts_primal_in):.2e}, opt dual: {opt_dual_in:.2e}")
+
+                # print(f'  epoch {epoch}, inner stopping crit: ', stop_crit_in)
+                # # print(opt_dual_in)
+                # # print(opts_primal_in)
+
                 if stop_crit_in <= tol_in:
                     break
 
@@ -228,7 +241,7 @@ def _scores_primal(X, w, z, penalty, primal_steps, ws):
 
 
 @njit
-def _score_dual(y, z, Xw, datafit, dual_step):
-    next_z = datafit.prox_conjugate(z + dual_step * Xw,
-                                    dual_step, y)
+def _score_dual(y, z, Xw, datafit, dual_steps):
+    next_z = datafit.prox_conjugate(z + dual_steps * Xw,
+                                    dual_steps, y)
     return norm(z - next_z, ord=np.inf)

From fba2d04ccb6ff42629c94386c77e5bc54f74ce58 Mon Sep 17 00:00:00 2001
From: Badr-MOUFAD <badr.moufad@emines.um6p.ma>
Date: Sat, 21 Sep 2024 20:08:48 +0200
Subject: [PATCH 4/8] more on steps

---
 skglm/experimental/pdcd_ws.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skglm/experimental/pdcd_ws.py b/skglm/experimental/pdcd_ws.py
index ab5c39369..3ed0a5827 100644
--- a/skglm/experimental/pdcd_ws.py
+++ b/skglm/experimental/pdcd_ws.py
@@ -101,8 +101,8 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         # init steps
         # choose steps to verify condition: Assumption 2.1 e)
         scale = np.sqrt(2 * n_features)
-        dual_steps = 1 / (norm(X, ord=2, axis=1) * scale)
-        primal_steps = 1 / ((dual_steps[:, None] * (X ** 2)).sum(axis=0) * scale)
+        dual_steps = 1 / (norm(X, ord=2) * scale)
+        primal_steps = 1 / (norm(X, axis=0, ord=2) * scale)
 
         # NOTE: primal and dual steps verify condition on steps when multiplied/divided
         # by an arbitrary positive constant

From f721d7ef210959c23b2143295900a456611d0e03 Mon Sep 17 00:00:00 2001
From: Badr-MOUFAD <badr.moufad@emines.um6p.ma>
Date: Sat, 21 Sep 2024 20:49:12 +0200
Subject: [PATCH 5/8] dual_steps ---> dual_step

---
 skglm/experimental/pdcd_ws.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/skglm/experimental/pdcd_ws.py b/skglm/experimental/pdcd_ws.py
index 3ed0a5827..6f6ead0c9 100644
--- a/skglm/experimental/pdcd_ws.py
+++ b/skglm/experimental/pdcd_ws.py
@@ -101,7 +101,7 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         # init steps
         # choose steps to verify condition: Assumption 2.1 e)
         scale = np.sqrt(2 * n_features)
-        dual_steps = 1 / (norm(X, ord=2) * scale)
+        dual_step = 1 / (norm(X, ord=2) * scale)
         primal_steps = 1 / (norm(X, axis=0, ord=2) * scale)
 
         # NOTE: primal and dual steps verify condition on steps when multiplied/divided
@@ -110,10 +110,10 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         # in the space with highest number of variable
         ratio = n_samples / n_features
         if n_samples > n_features:
-            dual_steps *= ratio
+            dual_step *= ratio
             primal_steps /= ratio
         else:
-            dual_steps /= ratio
+            dual_step /= ratio
             primal_steps *= ratio
 
         # primal vars
@@ -136,7 +136,7 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
             # check convergence using fixed-point criteria on both dual and primal
             opts_primal = _scores_primal(X, w, z, penalty, primal_steps, all_features)
-            opt_dual = _score_dual(y, z, Xw, datafit, dual_steps)
+            opt_dual = _score_dual(y, z, Xw, datafit, dual_step)
 
             stop_crit = max(max(opts_primal), opt_dual)
 
@@ -161,7 +161,7 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
             # inplace update of w, Xw, z, z_bar
             PDCD_WS._solve_subproblem(
                 y, X, w, Xw, z, z_bar, datafit, penalty,
-                primal_steps, dual_steps, ws, self.max_epochs, tol_in=0.3*stop_crit, verbose=self.verbose-1)
+                primal_steps, dual_step, ws, self.max_epochs, tol_in=0.3*stop_crit, verbose=self.verbose-1)
 
             current_p_obj = datafit.value(y, w, Xw) + penalty.value(w)
             p_objs.append(current_p_obj)
@@ -179,7 +179,7 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
     @njit
     def _solve_subproblem(
             y, X, w, Xw, z, z_bar, datafit, penalty, primal_steps,
-            dual_steps, ws, max_epochs, tol_in, verbose):
+            dual_step, ws, max_epochs, tol_in, verbose):
         n_features = X.shape[1]
 
         for epoch in range(max_epochs):
@@ -198,14 +198,14 @@ def _solve_subproblem(
                     Xw += delta_w_j * X[:, j]
 
                 # update dual
-                z_bar[:] = datafit.prox_conjugate(z + dual_steps * Xw,
-                                                  dual_steps, y)
+                z_bar[:] = datafit.prox_conjugate(z + dual_step * Xw,
+                                                  dual_step, y)
                 z += (z_bar - z) / n_features
 
             # check convergence using fixed-point criteria on both dual and primal
             if epoch % 1 == 0:
                 opts_primal_in = _scores_primal(X, w, z, penalty, primal_steps, ws)
-                opt_dual_in = _score_dual(y, z, Xw, datafit, dual_steps)
+                opt_dual_in = _score_dual(y, z, Xw, datafit, dual_step)
 
                 stop_crit_in = max(max(opts_primal_in), opt_dual_in)
                 # if verbose:
@@ -241,7 +241,7 @@ def _scores_primal(X, w, z, penalty, primal_steps, ws):
 
 
 @njit
-def _score_dual(y, z, Xw, datafit, dual_steps):
-    next_z = datafit.prox_conjugate(z + dual_steps * Xw,
-                                    dual_steps, y)
+def _score_dual(y, z, Xw, datafit, dual_step):
+    next_z = datafit.prox_conjugate(z + dual_step * Xw,
+                                    dual_step, y)
     return norm(z - next_z, ord=np.inf)

From 4dd48a0939bc5eba1eb7e924be77e0f7810af42d Mon Sep 17 00:00:00 2001
From: Badr-MOUFAD <badr.moufad@emines.um6p.ma>
Date: Sat, 21 Sep 2024 20:49:36 +0200
Subject: [PATCH 6/8] add to unittest

---
 .../experimental/tests/test_quantile_regression.py  | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/skglm/experimental/tests/test_quantile_regression.py b/skglm/experimental/tests/test_quantile_regression.py
index f4d1aa914..73203f5fd 100644
--- a/skglm/experimental/tests/test_quantile_regression.py
+++ b/skglm/experimental/tests/test_quantile_regression.py
@@ -12,9 +12,10 @@
 from sklearn.linear_model import QuantileRegressor
 
 
-@pytest.mark.parametrize('quantile_level', [0.3, 0.5, 0.7])
-def test_PDCD_WS(quantile_level):
-    n_samples, n_features = 50, 10
+@pytest.mark.parametrize('quantile_level,n_samples,n_features',
+                         ([[0.3, 50, 20], [0.5, 1000, 11], [0.7, 50, 100]])
+                         )
+def test_PDCD_WS(quantile_level, n_samples, n_features):
     X, y, _ = make_correlated_data(n_samples, n_features, random_state=123)
 
     # optimality condition for w = 0.
@@ -26,9 +27,7 @@ def test_PDCD_WS(quantile_level):
     datafit = compiled_clone(Pinball(quantile_level))
     penalty = compiled_clone(L1(alpha))
 
-    w = PDCD_WS(
-        dual_init=np.sign(y)/2 + (quantile_level - 0.5)
-    ).solve(X, y, datafit, penalty)[0]
+    w = PDCD_WS(tol=1e-9).solve(X, y, datafit, penalty)[0]
 
     clf = QuantileRegressor(
         quantile=quantile_level,
@@ -38,7 +37,7 @@ def test_PDCD_WS(quantile_level):
     ).fit(X, y)
 
     np.testing.assert_allclose(w, clf.coef_, atol=1e-5)
-    # test compatibility when inside GLM:
+    # unrelated: test compatibility when inside GLM:
     estimator = GeneralizedLinearEstimator(
         datafit=Pinball(.2),
         penalty=L1(alpha=1.),

From 171b8e61926f078ed7ffbfa5ab26a13bc2e12d52 Mon Sep 17 00:00:00 2001
From: Badr-MOUFAD <badr.moufad@emines.um6p.ma>
Date: Sat, 21 Sep 2024 20:50:50 +0200
Subject: [PATCH 7/8] clean ups

---
 skglm/experimental/pdcd_ws.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/skglm/experimental/pdcd_ws.py b/skglm/experimental/pdcd_ws.py
index 6f6ead0c9..c6a466926 100644
--- a/skglm/experimental/pdcd_ws.py
+++ b/skglm/experimental/pdcd_ws.py
@@ -161,7 +161,7 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
             # inplace update of w, Xw, z, z_bar
             PDCD_WS._solve_subproblem(
                 y, X, w, Xw, z, z_bar, datafit, penalty,
-                primal_steps, dual_step, ws, self.max_epochs, tol_in=0.3*stop_crit, verbose=self.verbose-1)
+                primal_steps, dual_step, ws, self.max_epochs, tol_in=0.3*stop_crit)
 
             current_p_obj = datafit.value(y, w, Xw) + penalty.value(w)
             p_objs.append(current_p_obj)
@@ -179,7 +179,7 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
     @njit
     def _solve_subproblem(
             y, X, w, Xw, z, z_bar, datafit, penalty, primal_steps,
-            dual_step, ws, max_epochs, tol_in, verbose):
+            dual_step, ws, max_epochs, tol_in):
         n_features = X.shape[1]
 
         for epoch in range(max_epochs):
@@ -208,15 +208,6 @@ def _solve_subproblem(
                 opt_dual_in = _score_dual(y, z, Xw, datafit, dual_step)
 
                 stop_crit_in = max(max(opts_primal_in), opt_dual_in)
-                # if verbose:
-                #     current_p_obj = datafit.value(y, w, X@w) + penalty.value(w)
-                #     print(
-                #         f"|----- epoch {epoch+1}: {current_p_obj:.10f}, "
-                #         f"opt primal: {max(opts_primal_in):.2e}, opt dual: {opt_dual_in:.2e}")
-
-                # print(f'  epoch {epoch}, inner stopping crit: ', stop_crit_in)
-                # # print(opt_dual_in)
-                # # print(opts_primal_in)
 
                 if stop_crit_in <= tol_in:
                     break

From 709de9584e71f8ce925ed99ba6d1298447638cc2 Mon Sep 17 00:00:00 2001
From: Badr-MOUFAD <badr.moufad@emines.um6p.ma>
Date: Sat, 21 Sep 2024 20:53:49 +0200
Subject: [PATCH 8/8] more on clean ups

---
 skglm/experimental/pdcd_ws.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/skglm/experimental/pdcd_ws.py b/skglm/experimental/pdcd_ws.py
index c6a466926..724bb3fb4 100644
--- a/skglm/experimental/pdcd_ws.py
+++ b/skglm/experimental/pdcd_ws.py
@@ -177,9 +177,8 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
 
     @staticmethod
     @njit
-    def _solve_subproblem(
-            y, X, w, Xw, z, z_bar, datafit, penalty, primal_steps,
-            dual_step, ws, max_epochs, tol_in):
+    def _solve_subproblem(y, X, w, Xw, z, z_bar, datafit, penalty,
+                          primal_steps, dual_step, ws, max_epochs, tol_in):
         n_features = X.shape[1]
 
         for epoch in range(max_epochs):
@@ -203,7 +202,7 @@ def _solve_subproblem(
                 z += (z_bar - z) / n_features
 
             # check convergence using fixed-point criteria on both dual and primal
-            if epoch % 1 == 0:
+            if epoch % 10 == 0:
                 opts_primal_in = _scores_primal(X, w, z, penalty, primal_steps, ws)
                 opt_dual_in = _score_dual(y, z, Xw, datafit, dual_step)