diff --git a/debug.py b/debug.py new file mode 100644 index 000000000..5daa97c46 --- /dev/null +++ b/debug.py @@ -0,0 +1,70 @@ +# %% +import numpy as np +from skglm import GeneralizedLinearEstimator +from skglm.experimental.pdcd_ws import PDCD_WS +from skglm.experimental.quantile_regression import Pinball +from skglm.penalties import L1 +from sklearn.datasets import make_regression +from sklearn.preprocessing import StandardScaler +from skglm.utils.jit_compilation import compiled_clone +from sklearn.linear_model import QuantileRegressor + + +def generate_dummy_data(n_samples=1000, n_features=10, noise=0.1): + X, y = make_regression(n_samples=n_samples, n_features=n_features, noise=noise) + # y -= y.mean() + # y += 0.1 + y /= 10 + return X, y + + +np.random.seed(42) + +quantile_level = 0.5 +alpha = 0.1 + +X, y = generate_dummy_data( + n_samples=1000, # if this is reduced to 100 samples, it converges + n_features=11, +) + +solver = PDCD_WS( + p0=11, + max_iter=50, + max_epochs=500, + tol=1e-5, + warm_start=False, + verbose=2, +) + +datafit = Pinball(quantile_level) +penalty = L1(alpha=alpha) + +df = compiled_clone(datafit) +pen = compiled_clone(penalty) + +res = solver.solve(X, y, df, pen) + +# %% + +clf = QuantileRegressor( + quantile=quantile_level, + alpha=alpha/len(y), + fit_intercept=False, + solver='highs', +).fit(X, y) + +# %% +print("diff solution:", np.linalg.norm((clf.coef_ - res[0]))) + +# %% + + +def obj_val(w): + return df.value(y, w, X @ w) + pen.value(w) + + +for label, w in zip(("skglm", "sklearn"), (res[0], clf.coef_)): + print(f"{label:10} {obj_val(w)=}") + +# %% diff --git a/skglm/experimental/pdcd_ws.py b/skglm/experimental/pdcd_ws.py index 5ef49e5d4..724bb3fb4 100644 --- a/skglm/experimental/pdcd_ws.py +++ b/skglm/experimental/pdcd_ws.py @@ -84,7 +84,7 @@ class PDCD_WS(BaseSolver): def __init__( self, max_iter=1000, max_epochs=1000, dual_init=None, p0=100, tol=1e-6, - fit_intercept=False, warm_start=True, verbose=False + fit_intercept=False, warm_start=True, verbose=0 ): self.max_iter = max_iter self.max_epochs = max_epochs @@ -99,11 +99,22 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): n_samples, n_features = X.shape # init steps - # Despite violating the conditions mentioned in [1] - # this choice of steps yield in practice a convergent algorithm - # with better speed of convergence - dual_step = 1 / norm(X, ord=2) - primal_steps = 1 / norm(X, axis=0, ord=2) + # choose steps to verify condition: Assumption 2.1 e) + scale = np.sqrt(2 * n_features) + dual_step = 1 / (norm(X, ord=2) * scale) + primal_steps = 1 / (norm(X, axis=0, ord=2) * scale) + + # NOTE: primal and dual steps verify condition on steps when multiplied/divided + # by an arbitrary positive constant + # HACK: balance primal and dual variable: take bigger steps + # in the space with highest number of variable + ratio = n_samples / n_features + if n_samples > n_features: + dual_step *= ratio + primal_steps /= ratio + else: + dual_step /= ratio + primal_steps *= ratio # primal vars w = np.zeros(n_features) if w_init is None else w_init diff --git a/skglm/experimental/tests/test_quantile_regression.py b/skglm/experimental/tests/test_quantile_regression.py index f4d1aa914..73203f5fd 100644 --- a/skglm/experimental/tests/test_quantile_regression.py +++ b/skglm/experimental/tests/test_quantile_regression.py @@ -12,9 +12,10 @@ from sklearn.linear_model import QuantileRegressor -@pytest.mark.parametrize('quantile_level', [0.3, 0.5, 0.7]) -def test_PDCD_WS(quantile_level): - n_samples, n_features = 50, 10 +@pytest.mark.parametrize('quantile_level,n_samples,n_features', + ([[0.3, 50, 20], [0.5, 1000, 11], [0.7, 50, 100]]) + ) +def test_PDCD_WS(quantile_level, n_samples, n_features): X, y, _ = make_correlated_data(n_samples, n_features, random_state=123) # optimality condition for w = 0. @@ -26,9 +27,7 @@ def test_PDCD_WS(quantile_level): datafit = compiled_clone(Pinball(quantile_level)) penalty = compiled_clone(L1(alpha)) - w = PDCD_WS( - dual_init=np.sign(y)/2 + (quantile_level - 0.5) - ).solve(X, y, datafit, penalty)[0] + w = PDCD_WS(tol=1e-9).solve(X, y, datafit, penalty)[0] clf = QuantileRegressor( quantile=quantile_level, @@ -38,7 +37,7 @@ def test_PDCD_WS(quantile_level): ).fit(X, y) np.testing.assert_allclose(w, clf.coef_, atol=1e-5) - # test compatibility when inside GLM: + # unrelated: test compatibility when inside GLM: estimator = GeneralizedLinearEstimator( datafit=Pinball(.2), penalty=L1(alpha=1.),