diff --git a/@FBCache/Backtrack_Gamma.m b/+forbes/+fbe/@FBCache/Backtrack_Gamma.m similarity index 100% rename from @FBCache/Backtrack_Gamma.m rename to +forbes/+fbe/@FBCache/Backtrack_Gamma.m diff --git a/@FBCache/Check_Gamma.m b/+forbes/+fbe/@FBCache/Check_Gamma.m similarity index 94% rename from @FBCache/Check_Gamma.m rename to +forbes/+fbe/@FBCache/Check_Gamma.m index e03fae4..bf042bb 100755 --- a/@FBCache/Check_Gamma.m +++ b/+forbes/+fbe/@FBCache/Check_Gamma.m @@ -18,7 +18,7 @@ function [flag, cache_pg] = Check_Gamma(cache, bet) z = cache.Get_ProxGradStep(); -cache_pg = FBCache(cache.prob, z, cache.gam, cache.ops); +cache_pg = forbes.fbe.FBCache(cache.prob, z, cache.gam, cache.ops); fz = cache_pg.Get_f(); % We should do the following, but maybe accessing the fields directly has diff --git a/@FBCache/Check_StoppingCriterion.m b/+forbes/+fbe/@FBCache/Check_StoppingCriterion.m similarity index 100% rename from @FBCache/Check_StoppingCriterion.m rename to +forbes/+fbe/@FBCache/Check_StoppingCriterion.m diff --git a/@FBCache/FBCache.m b/+forbes/+fbe/@FBCache/FBCache.m similarity index 100% rename from @FBCache/FBCache.m rename to +forbes/+fbe/@FBCache/FBCache.m diff --git a/@FBCache/Get_CacheLine.m b/+forbes/+fbe/@FBCache/Get_CacheLine.m similarity index 90% rename from @FBCache/Get_CacheLine.m rename to +forbes/+fbe/@FBCache/Get_CacheLine.m index 7e68b1b..0d83859 100755 --- a/@FBCache/Get_CacheLine.m +++ b/+forbes/+fbe/@FBCache/Get_CacheLine.m @@ -16,7 +16,7 @@ gam = cache.gam; if nargin < 4 - cachet = FBCache(cache.prob, cache.x + tau*cache.dir1, cache.gam, cache.ops); + cachet = forbes.fbe.FBCache(cache.prob, cache.x + tau*cache.dir1, cache.gam, cache.ops); end if nargin < 4 || ~cachet.flagGradStep @@ -33,9 +33,9 @@ if prob.istheref2 cachet.res2x = cache.res2x + tau*cache.C2dir1; if prob.useHessian - [f2xt, gradf2res2xt, cachet.Hessf2res2x] = prob.callf2(cachet.res2x); + [gradf2res2xt, f2xt, cachet.Hessf2res2x] = prob.f2.gradient(cachet.res2x); else - [f2xt, gradf2res2xt] = prob.callf2(cachet.res2x); + [gradf2res2xt, f2xt] = prob.f2.gradient(cachet.res2x); cachet.gradf2res2x = gradf2res2xt; end if cache.flagOps @@ -68,10 +68,10 @@ if nargin < 4 || ~cachet.flagProxGradStep if prob.isthereD mugam = prob.mu*gam; - [z, cachet.gz] = prob.callg(prob.D*cachet.y, mugam); + [z, cachet.gz] = prob.g.prox(prob.D*cachet.y, mugam); cachet.z = cachet.y + prob.D'*(z - prob.D*cachet.y)/prob.mu; else - [cachet.z, cachet.gz] = prob.callg(cachet.y, gam); + [cachet.z, cachet.gz] = prob.g.prox(cachet.y, gam); end if cache.flagOps cache.ops.addproxg(); diff --git a/@FBCache/Get_CacheSegment.m b/+forbes/+fbe/@FBCache/Get_CacheSegment.m similarity index 84% rename from @FBCache/Get_CacheSegment.m rename to +forbes/+fbe/@FBCache/Get_CacheSegment.m index 85aced9..45a304b 100644 --- a/@FBCache/Get_CacheSegment.m +++ b/+forbes/+fbe/@FBCache/Get_CacheSegment.m @@ -7,7 +7,7 @@ prob = cache.prob; gam = cache.gam; -cachet = FBCache(cache.prob, cache.x + tau*cache.dir1 + (1-tau)*cache.dir2, cache.gam, cache.ops); +cachet = forbes.fbe.FBCache(cache.prob, cache.x + tau*cache.dir1 + (1-tau)*cache.dir2, cache.gam, cache.ops); fxt = 0; gradfxt = 0; @@ -22,9 +22,9 @@ if prob.istheref2 cachet.res2x = cache.res2x + tau*cache.C2dir1 + (1-tau)*cache.C2dir2; if prob.useHessian - [f2xt, gradf2res2xt, cachet.Hessf2res2x] = prob.callf2(cachet.res2x); + [gradf2res2xt, f2xt, cachet.Hessf2res2x] = prob.f2.gradient(cachet.res2x); else - [f2xt, gradf2res2xt] = prob.callf2(cachet.res2x); + [gradf2res2xt, f2xt] = prob.f2.gradient(cachet.res2x); cachet.gradf2res2x = gradf2res2xt; end if cache.flagOps @@ -54,10 +54,10 @@ if prob.isthereD mugam = prob.mu*gam; - [z, cachet.gz] = prob.callg(prob.D*cachet.y, mugam); + [z, cachet.gz] = prob.g.prox(prob.D*cachet.y, mugam); cachet.z = cachet.y + prob.D'*(z - prob.D*cachet.y)/prob.mu; else - [cachet.z, cachet.gz] = prob.callg(cachet.y, gam); + [cachet.z, cachet.gz] = prob.g.prox(cachet.y, gam); end if cache.flagOps cache.ops.addproxg(); diff --git a/@FBCache/Get_FBE.m b/+forbes/+fbe/@FBCache/Get_FBE.m similarity index 100% rename from @FBCache/Get_FBE.m rename to +forbes/+fbe/@FBCache/Get_FBE.m diff --git a/@FBCache/Get_FPR.m b/+forbes/+fbe/@FBCache/Get_FPR.m similarity index 100% rename from @FBCache/Get_FPR.m rename to +forbes/+fbe/@FBCache/Get_FPR.m diff --git a/@FBCache/Get_Gamma.m b/+forbes/+fbe/@FBCache/Get_Gamma.m similarity index 100% rename from @FBCache/Get_Gamma.m rename to +forbes/+fbe/@FBCache/Get_Gamma.m diff --git a/@FBCache/Get_GradFBE.m b/+forbes/+fbe/@FBCache/Get_GradFBE.m similarity index 96% rename from @FBCache/Get_GradFBE.m rename to +forbes/+fbe/@FBCache/Get_GradFBE.m index 1a9299e..ffbe212 100755 --- a/@FBCache/Get_GradFBE.m +++ b/+forbes/+fbe/@FBCache/Get_GradFBE.m @@ -45,7 +45,7 @@ else % imaginary trick res2xepsFPR = cache.res2x + 1e-100i*C2FPR; - [~, gradf2res2xepsd] = prob.callf2(res2xepsFPR); + [gradf2res2xepsd, ~] = prob.f2.gradient(res2xepsFPR); if cache.flagOps, cache.ops.addgradf2(); end HC2FPR = imag(gradf2res2xepsd)/1e-100; % forward differences diff --git a/@FBCache/Get_GradStep.m b/+forbes/+fbe/@FBCache/Get_GradStep.m similarity index 100% rename from @FBCache/Get_GradStep.m rename to +forbes/+fbe/@FBCache/Get_GradStep.m diff --git a/@FBCache/Get_Gradf.m b/+forbes/+fbe/@FBCache/Get_Gradf.m similarity index 80% rename from @FBCache/Get_Gradf.m rename to +forbes/+fbe/@FBCache/Get_Gradf.m index fafe056..2a2fa62 100644 --- a/@FBCache/Get_Gradf.m +++ b/+forbes/+fbe/@FBCache/Get_Gradf.m @@ -28,18 +28,18 @@ if prob.istheref2 if prob.isthereC2 if prob.useHessian - [~, gradf2res2x, cache.Hessf2res2x] = prob.callf2(cache.res2x); + [gradf2res2x, ~, cache.Hessf2res2x] = prob.f2.gradient(cache.res2x); else - [~, gradf2res2x] = prob.callf2(cache.res2x); + [gradf2res2x, ~] = prob.f2.gradient(cache.res2x); cache.gradf2res2x = gradf2res2x; end cache.gradf2x = prob.C2'*gradf2res2x; if cache.flagOps, cache.ops.addC2(); end else if prob.useHessian - [~, gradf2res2x, cache.Hessf2res2x] = prob.callf2(cache.res2x); + [gradf2res2x, ~, cache.Hessf2res2x] = prob.f2.gradient(cache.res2x); else - [~, gradf2res2x] = prob.callf2(cache.res2x); + [gradf2res2x, ~] = prob.f2.gradient(cache.res2x); cache.gradf2res2x = gradf2res2x; end cache.gradf2x = gradf2res2x; diff --git a/@FBCache/Get_NormFPR.m b/+forbes/+fbe/@FBCache/Get_NormFPR.m similarity index 100% rename from @FBCache/Get_NormFPR.m rename to +forbes/+fbe/@FBCache/Get_NormFPR.m diff --git a/@FBCache/Get_Ops.m b/+forbes/+fbe/@FBCache/Get_Ops.m similarity index 100% rename from @FBCache/Get_Ops.m rename to +forbes/+fbe/@FBCache/Get_Ops.m diff --git a/@FBCache/Get_Point.m b/+forbes/+fbe/@FBCache/Get_Point.m similarity index 100% rename from @FBCache/Get_Point.m rename to +forbes/+fbe/@FBCache/Get_Point.m diff --git a/@FBCache/Get_Problem.m b/+forbes/+fbe/@FBCache/Get_Problem.m similarity index 100% rename from @FBCache/Get_Problem.m rename to +forbes/+fbe/@FBCache/Get_Problem.m diff --git a/@FBCache/Get_ProxGradStep.m b/+forbes/+fbe/@FBCache/Get_ProxGradStep.m similarity index 86% rename from @FBCache/Get_ProxGradStep.m rename to +forbes/+fbe/@FBCache/Get_ProxGradStep.m index 4c7fdea..405f600 100755 --- a/@FBCache/Get_ProxGradStep.m +++ b/+forbes/+fbe/@FBCache/Get_ProxGradStep.m @@ -20,10 +20,10 @@ if prob.isthereD mugam = prob.mu*gam; - [z, cache.gz] = prob.callg(prob.D*cache.y, mugam); + [z, cache.gz] = prob.g.prox(prob.D*cache.y, mugam); cache.z = cache.y + prob.D'*(z - prob.D*cache.y)/prob.mu; else - [cache.z, cache.gz] = prob.callg(cache.y, gam); + [cache.z, cache.gz] = prob.g.prox(cache.y, gam); end if cache.flagOps cache.ops.addproxg(); diff --git a/@FBCache/Get_ProxStep.m b/+forbes/+fbe/@FBCache/Get_ProxStep.m similarity index 100% rename from @FBCache/Get_ProxStep.m rename to +forbes/+fbe/@FBCache/Get_ProxStep.m diff --git a/@FBCache/Get_Slope.m b/+forbes/+fbe/@FBCache/Get_Slope.m similarity index 100% rename from @FBCache/Get_Slope.m rename to +forbes/+fbe/@FBCache/Get_Slope.m diff --git a/@FBCache/Get_f.m b/+forbes/+fbe/@FBCache/Get_f.m similarity index 82% rename from @FBCache/Get_f.m rename to +forbes/+fbe/@FBCache/Get_f.m index 6a2907d..95f6964 100755 --- a/@FBCache/Get_f.m +++ b/+forbes/+fbe/@FBCache/Get_f.m @@ -17,10 +17,10 @@ C1x = prob.C1*cache.x; if cache.flagOps, cache.ops.addC1(); end cache.res1x = C1x + prob.d1; - [cache.f1x, cache.gradf1res1x] = prob.callf1(cache.res1x); + [cache.gradf1res1x, cache.f1x] = prob.f1.gradient(cache.res1x); else cache.res1x = cache.x + prob.d1; - [cache.f1x, cache.gradf1res1x] = prob.callf1(cache.res1x); + [cache.gradf1res1x, cache.f1x] = prob.f1.gradient(cache.res1x); cache.gradf1x = cache.gradf1res1x; end if cache.flagOps @@ -36,10 +36,10 @@ C2x = prob.C2*cache.x; if cache.flagOps, cache.ops.addC2(); end cache.res2x = C2x + prob.d2; - f2x = prob.callf2(cache.res2x); + [~, f2x] = prob.f2.gradient(cache.res2x); else cache.res2x = cache.x + prob.d2; - f2x = prob.callf2(cache.res2x); + [~, f2x] = prob.f2.gradient(cache.res2x); end if cache.flagOps, cache.ops.addf2(); end cache.f2x = f2x; diff --git a/@FBCache/Get_g.m b/+forbes/+fbe/@FBCache/Get_g.m similarity index 100% rename from @FBCache/Get_g.m rename to +forbes/+fbe/@FBCache/Get_g.m diff --git a/@FBCache/Set_Directions.m b/+forbes/+fbe/@FBCache/Set_Directions.m similarity index 100% rename from @FBCache/Set_Directions.m rename to +forbes/+fbe/@FBCache/Set_Directions.m diff --git a/@FBCache/Set_Gamma.m b/+forbes/+fbe/@FBCache/Set_Gamma.m similarity index 100% rename from @FBCache/Set_Gamma.m rename to +forbes/+fbe/@FBCache/Set_Gamma.m diff --git a/@FBOperations/FBOperations.m b/+forbes/+fbe/@FBOperations/FBOperations.m similarity index 100% rename from @FBOperations/FBOperations.m rename to +forbes/+fbe/@FBOperations/FBOperations.m diff --git a/+forbes/+functions/@Conjugate/Conjugate.m b/+forbes/+functions/@Conjugate/Conjugate.m new file mode 100644 index 0000000..d8e09ad --- /dev/null +++ b/+forbes/+functions/@Conjugate/Conjugate.m @@ -0,0 +1,27 @@ +% CONJUGATE Fenchel conjugate function (of some other given function) + +classdef Conjugate < forbes.functions.Proximable + properties + f + end + methods + function obj = Conjugate(f) + obj.f = f; + end + function p = is_convex(obj) + p = true; + end + function p = is_strongly_convex(obj) + p = obj.f.is_smooth(); + end + function p = is_smooth(obj) + p = obj.f.is_strongly_convex(); + end + function p = is_quadratic(obj) + p = obj.f.is_strongly_convex() && obj.f.is_generalized_quadratic(); + end + function p = is_generalized_quadratic(obj) + p = obj.f.is_quadratic(); + end + end +end diff --git a/+forbes/+functions/@Conjugate/compute_gradient.m b/+forbes/+functions/@Conjugate/compute_gradient.m new file mode 100644 index 0000000..c65ca28 --- /dev/null +++ b/+forbes/+functions/@Conjugate/compute_gradient.m @@ -0,0 +1,3 @@ +function [g, v] = compute_gradient(obj, x) + [g, v] = obj.f.compute_gradient_conjugate(x); +end diff --git a/+forbes/+functions/@Conjugate/compute_prox.m b/+forbes/+functions/@Conjugate/compute_prox.m new file mode 100644 index 0000000..e370e15 --- /dev/null +++ b/+forbes/+functions/@Conjugate/compute_prox.m @@ -0,0 +1,8 @@ +function [p, v] = compute_prox(obj, x, gam) + % use Moreau identity: + % prox(x; gam, f*) = x - gam*prox(x/gam; 1/gam, f) + % + [p1, v1] = obj.f.prox(x/gam, 1/gam); + p = x - gam*p1; + v = x(:)'*p1(:) - gam*p1(:)'*p1(:) - v1; +end diff --git a/+forbes/+functions/@DistBoxL1/DistBoxL1.m b/+forbes/+functions/@DistBoxL1/DistBoxL1.m new file mode 100644 index 0000000..b8de0d9 --- /dev/null +++ b/+forbes/+functions/@DistBoxL1/DistBoxL1.m @@ -0,0 +1,19 @@ +% DISTBOXL1 L1 distance from a box + +classdef DistBoxL1 < forbes.functions.Proximable + properties + lb, ub % box bounds + w + end + methods + function obj = DistBoxL1(lb, ub, w) + if nargin < 3, w = 1.0; end + obj.lb = lb; + obj.ub = ub; + obj.w = w; + end + function p = is_convex(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@DistBoxL1/compute_prox.m b/+forbes/+functions/@DistBoxL1/compute_prox.m new file mode 100644 index 0000000..ef78729 --- /dev/null +++ b/+forbes/+functions/@DistBoxL1/compute_prox.m @@ -0,0 +1,13 @@ +function [p, v] = compute_prox(obj, x, gam) + mu = gam * obj.w; + p = max(x-obj.ub-mu, 0) - max(obj.lb-x-mu, 0) + min(max(x, obj.lb), obj.ub); + if nargout > 1 + proj = max(obj.lb, min(obj.ub, p)); + if isscalar(obj.w) + v = sum(obj.w*abs(p-proj)); + else + finw = ~isinf(obj.w); + v = sum(obj.w(finw).*abs(p(finw)-proj(finw))); + end + end +end diff --git a/+forbes/+functions/@DistL2/DistL2.m b/+forbes/+functions/@DistL2/DistL2.m new file mode 100644 index 0000000..91ecad0 --- /dev/null +++ b/+forbes/+functions/@DistL2/DistL2.m @@ -0,0 +1,18 @@ +% DISTL2 L2 (Euclidean) distance from a convex set + +classdef DistL2 < forbes.functions.Proximable + properties + ind % indicator function of the (convex) set + lam + end + methods + function obj = DistL2(ind, lam) + if nargin < 2, lam = 1.0; end + obj.ind = ind; + obj.lam = lam; + end + function p = is_convex(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@DistL2/compute_prox.m b/+forbes/+functions/@DistL2/compute_prox.m new file mode 100644 index 0000000..cd62f17 --- /dev/null +++ b/+forbes/+functions/@DistL2/compute_prox.m @@ -0,0 +1,13 @@ +function [y, v] = compute_prox(obj, x, gam) + p = obj.ind.compute_prox(x, 1.0); + d = norm(x-p, 'fro'); + gamlam = (gam*obj.lam); + if gamlam < d + gamlamd = gamlam/d; + y = (1-gamlamd)*x + gamlamd*p; + v = obj.lam*(d-gamlam); + else + y = p; + v = 0.0; + end +end diff --git a/+forbes/+functions/@EpiCompose/EpiCompose.m b/+forbes/+functions/@EpiCompose/EpiCompose.m new file mode 100644 index 0000000..c1e867e --- /dev/null +++ b/+forbes/+functions/@EpiCompose/EpiCompose.m @@ -0,0 +1,33 @@ +% EPICOMPOSE Epi-composition +% +% EPICOMPOSE(f, A) returns the function g(y) = inf {f(x) : Ax = y}. + +classdef EpiCompose < forbes.functions.Proximable + properties + f, A + mu + flag % f is 1: Quadratic, 2: QuadraticOverAffine, 3: Anything else + Q, q, C, d % to store data of (generalized) quadratic functions + gam_prox, L_prox % to store Cholesky factor + x + end + methods + function obj = EpiCompose(f, A) + if isa(f, 'Quadratic') + obj.flag = 1; + obj.Q = f.Q; + obj.q = f.q; + obj.gam_prox = 0; + if isscalar(A), A = A*speye(size(f.Q, 2)); end + else + obj.flag = 3; + obj.mu = Proximable.get_gram_diagonal(A'); + if obj.mu == 0 + error('A must be s.t. A''*A = mu*Id'); + end + end + obj.f = f; + obj.A = A; + end + end +end diff --git a/+forbes/+functions/@EpiCompose/compute_prox.m b/+forbes/+functions/@EpiCompose/compute_prox.m new file mode 100644 index 0000000..c89eb54 --- /dev/null +++ b/+forbes/+functions/@EpiCompose/compute_prox.m @@ -0,0 +1,19 @@ +function [p, v] = compute_prox(obj, s, gam) + switch obj.flag + case 1 % f = Quadratic + if gam ~= obj.gam_prox % more robust test? + % factor matrix when gam changes (or at first call) + obj.gam_prox = gam; + obj.L_prox = chol(obj.Q + (1/gam)*obj.A'*obj.A); % do differently for sparse? + end + obj.x = obj.L_prox\(obj.L_prox'\((obj.A'*s)/gam - obj.q)); + v = 0.5*(obj.x'*(obj.Q*obj.x)) + obj.q'*obj.x; % can we save something here? + case 2 % f = QuadraticOverAffine + error('not implemented'); + case 3 % f = Any proximable function and A'*A = mu*Id, mu > 0 + [obj.x, v] = obj.f.compute_prox(obj.A'*s/obj.mu, gam/obj.mu); + otherwise + error('not implemented'); + end + p = obj.A*obj.x; +end diff --git a/+forbes/+functions/@HingeLoss/HingeLoss.m b/+forbes/+functions/@HingeLoss/HingeLoss.m new file mode 100644 index 0000000..bd1034e --- /dev/null +++ b/+forbes/+functions/@HingeLoss/HingeLoss.m @@ -0,0 +1,18 @@ +% HINGELOSS Hinge loss function + +classdef HingeLoss < forbes.functions.Proximable + properties + mu, b + end + methods + function obj = HingeLoss(mu, b) + if nargin < 1, mu = 1.0; end + if nargin < 2, b = 1.0; end + obj.mu = mu; + obj.b = b; + end + function p = is_convex(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@HingeLoss/compute_prox.m b/+forbes/+functions/@HingeLoss/compute_prox.m new file mode 100644 index 0000000..e9e3191 --- /dev/null +++ b/+forbes/+functions/@HingeLoss/compute_prox.m @@ -0,0 +1,7 @@ +function [p, v] = compute_prox(obj, x, gam) + bx = obj.b .* x; + ind = bx < 1; + p(ind,1) = obj.b(ind) .* min(bx(ind)+gam*obj.mu,1); + p(~ind,1) = x(~ind); + v = obj.mu*sum(max(0,1-obj.b .* p)); +end diff --git a/+forbes/+functions/@HuberLoss/HuberLoss.m b/+forbes/+functions/@HuberLoss/HuberLoss.m new file mode 100644 index 0000000..cf66ab9 --- /dev/null +++ b/+forbes/+functions/@HuberLoss/HuberLoss.m @@ -0,0 +1,18 @@ +% HUBERLOSS Huber loss function + +classdef HuberLoss < forbes.functions.Proximable + properties + del % coefficient + end + methods + function obj = HuberLoss(del) + obj.del = del; + end + function p = is_convex(obj) + p = true; + end + function p = is_smooth(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@HuberLoss/compute_gradient.m b/+forbes/+functions/@HuberLoss/compute_gradient.m new file mode 100644 index 0000000..c08e6e9 --- /dev/null +++ b/+forbes/+functions/@HuberLoss/compute_gradient.m @@ -0,0 +1,13 @@ +function [grad, val] = compute_gradient(obj, x) + absx = abs(x); + small = absx <= obj.del; + large = ~small; + sqx = (0.5/obj.del)*(x(small).^2); + linx = absx(large)-0.5*obj.del; + val = sum(sqx)+sum(linx); + if nargout >= 2 + grad = zeros(length(x),1); + grad(small) = x(small)/obj.del; + grad(large) = sign(x(large)); + end +end diff --git a/+forbes/+functions/@IndAffine/IndAffine.m b/+forbes/+functions/@IndAffine/IndAffine.m new file mode 100644 index 0000000..a8c4400 --- /dev/null +++ b/+forbes/+functions/@IndAffine/IndAffine.m @@ -0,0 +1,15 @@ +% INDAFFINE Indicator function of an affine subspace + +classdef IndAffine < forbes.functions.Proximable + properties + A, b % define the affine subspace + L_prox % stores the Cholesky factor needed to compute prox + end + methods + function obj = IndAffine(A, b) + obj.A = A; + obj.b = b; + obj.L_prox = []; + end + end +end diff --git a/+forbes/+functions/@IndAffine/compute_prox.m b/+forbes/+functions/@IndAffine/compute_prox.m new file mode 100644 index 0000000..988be8a --- /dev/null +++ b/+forbes/+functions/@IndAffine/compute_prox.m @@ -0,0 +1,8 @@ +function [p, v] = compute_prox(obj, x, gam) + if isempty(obj.L_prox) + obj.make_prox(); + end + res = obj.A*x - obj.b; + p = x - obj.A'*(obj.L_prox'\(obj.L_prox\res)); + v = 0.0; +end diff --git a/+forbes/+functions/@IndAffine/make_prox.m b/+forbes/+functions/@IndAffine/make_prox.m new file mode 100644 index 0000000..50b74fe --- /dev/null +++ b/+forbes/+functions/@IndAffine/make_prox.m @@ -0,0 +1,4 @@ +function make_prox(obj) + % can we make this more efficient, e.g., for sparse matrices? + obj.L_prox = chol(obj.A*obj.A','lower'); +end diff --git a/+forbes/+functions/@IndBallL0/IndBallL0.m b/+forbes/+functions/@IndBallL0/IndBallL0.m new file mode 100644 index 0000000..31fda26 --- /dev/null +++ b/+forbes/+functions/@IndBallL0/IndBallL0.m @@ -0,0 +1,15 @@ +% INDBALLL0 Indicator function the L0 pseudo-norm ball + +classdef IndBallL0 < forbes.functions.Proximable + properties + N % ball radius + T % L-infinity threshold (to make the ball compact) + end + methods + function obj = IndBallL0(N, T) + if nargin < 2, T = Inf; end + obj.N = N; + obj.T = T; + end + end +end diff --git a/+forbes/+functions/@IndBallL0/compute_prox.m b/+forbes/+functions/@IndBallL0/compute_prox.m new file mode 100644 index 0000000..deb0760 --- /dev/null +++ b/+forbes/+functions/@IndBallL0/compute_prox.m @@ -0,0 +1,6 @@ +function [p, v] = compute_prox(obj, x, gam) + p = x; + [~, I] = sort(abs(p), 'descend'); + p(I(obj.N+1:end)) = 0; + v = 0; +end \ No newline at end of file diff --git a/+forbes/+functions/@IndBallL2/IndBallL2.m b/+forbes/+functions/@IndBallL2/IndBallL2.m new file mode 100644 index 0000000..fdb46f4 --- /dev/null +++ b/+forbes/+functions/@IndBallL2/IndBallL2.m @@ -0,0 +1,15 @@ +% INDBALLL2 Indicator function of the L2 (Euclidean) ball + +classdef IndBallL2 < forbes.functions.Proximable + properties + R % ball radius + end + methods + function obj = IndBallL2(R) + if nargin < 1 + R = 1.0; + end + obj.R = R; + end + end +end diff --git a/+forbes/+functions/@IndBallL2/compute_prox.m b/+forbes/+functions/@IndBallL2/compute_prox.m new file mode 100644 index 0000000..5c86799 --- /dev/null +++ b/+forbes/+functions/@IndBallL2/compute_prox.m @@ -0,0 +1,9 @@ +function [p, v] = compute_prox(obj, x, gam) + normx = norm(x); + if normx > obj.R + p = obj.R/normx * x; + else + p = x; + end + v = 0.0; +end diff --git a/+forbes/+functions/@IndBinary/IndBinary.m b/+forbes/+functions/@IndBinary/IndBinary.m new file mode 100644 index 0000000..af13d83 --- /dev/null +++ b/+forbes/+functions/@IndBinary/IndBinary.m @@ -0,0 +1,17 @@ +% INDBINARY Indicator function the set {low,high}^n + +classdef IndBinary < forbes.functions.Proximable + properties + low, high + end + methods + function obj = IndBinary(low, high) + if nargin == 0 + low = 0; + high = 1; + end + obj.low = low; + obj.high = high; + end + end +end diff --git a/+forbes/+functions/@IndBinary/compute_prox.m b/+forbes/+functions/@IndBinary/compute_prox.m new file mode 100644 index 0000000..dcad627 --- /dev/null +++ b/+forbes/+functions/@IndBinary/compute_prox.m @@ -0,0 +1,7 @@ +function [p, v] = compute_prox(obj, x, gam) + diff_low = abs(x - obj.low); + diff_high = abs(x - obj.high); + ind_low = diff_low <= diff_high; + p = ind_low .* obj.low + ~ind_low .* obj.high; + v = 0.0; +end diff --git a/+forbes/+functions/@IndBox/IndBox.m b/+forbes/+functions/@IndBox/IndBox.m new file mode 100644 index 0000000..1128b51 --- /dev/null +++ b/+forbes/+functions/@IndBox/IndBox.m @@ -0,0 +1,13 @@ +% INDBOX Indicator function of a box + +classdef IndBox < forbes.functions.Proximable + properties + lo, hi % box boundaries + end + methods + function obj = IndBox(lo, hi) + obj.lo = lo; + obj.hi = hi; + end + end +end diff --git a/+forbes/+functions/@IndBox/compute_prox.m b/+forbes/+functions/@IndBox/compute_prox.m new file mode 100644 index 0000000..c7add5b --- /dev/null +++ b/+forbes/+functions/@IndBox/compute_prox.m @@ -0,0 +1,4 @@ +function [p, v] = compute_prox(obj, x, gam) + p = min(obj.hi, max(obj.lo, x)); + v = 0; +end diff --git a/+forbes/+functions/@IndPoint/IndPoint.m b/+forbes/+functions/@IndPoint/IndPoint.m new file mode 100644 index 0000000..1b04183 --- /dev/null +++ b/+forbes/+functions/@IndPoint/IndPoint.m @@ -0,0 +1,12 @@ +% INDPOINT Indicator function of a singleton + +classdef IndPoint < forbes.functions.Proximable + properties + p + end + methods + function obj = IndPoint(p) + obj.p = p; + end + end +end diff --git a/+forbes/+functions/@IndPoint/compute_prox.m b/+forbes/+functions/@IndPoint/compute_prox.m new file mode 100644 index 0000000..e10986f --- /dev/null +++ b/+forbes/+functions/@IndPoint/compute_prox.m @@ -0,0 +1,4 @@ +function [p, v] = compute_prox(obj, ~, ~) + p = obj.p; + v = 0; +end diff --git a/+forbes/+functions/@IndPos/IndPos.m b/+forbes/+functions/@IndPos/IndPos.m new file mode 100644 index 0000000..0b10ccf --- /dev/null +++ b/+forbes/+functions/@IndPos/IndPos.m @@ -0,0 +1,13 @@ +% INDPOS Indicator function of the nonnegative orthant + +classdef IndPos < forbes.functions.Proximable + properties + lo % lower boundary + end + methods + function obj = IndPos(lo) + if nargin < 1, lo = 0.0; end + obj.lo = lo; + end + end +end diff --git a/+forbes/+functions/@IndPos/compute_prox.m b/+forbes/+functions/@IndPos/compute_prox.m new file mode 100644 index 0000000..0c60d2d --- /dev/null +++ b/+forbes/+functions/@IndPos/compute_prox.m @@ -0,0 +1,4 @@ +function [p, v] = compute_prox(obj, x, gam) + p = max(obj.lo, x); + v = 0; +end diff --git a/+forbes/+functions/@IndRankBall/IndRankBall.m b/+forbes/+functions/@IndRankBall/IndRankBall.m new file mode 100644 index 0000000..f9d9fc6 --- /dev/null +++ b/+forbes/+functions/@IndRankBall/IndRankBall.m @@ -0,0 +1,24 @@ +% INDRANKBALL Indicator function of the set of matrices with rank at most r + +classdef IndRankBall < forbes.functions.Proximable + properties + r % ball radius + method % indicates which svd to use + end + methods + function obj = IndRankBall(r, method) + if nargin < 2 + method = 'svds'; + end + switch method + case 'svds' + obj.method = 1; + case 'lansvd' + obj.method = 2; + otherwise + error('unknown method'); + end + obj.r = r; + end + end +end diff --git a/+forbes/+functions/@IndRankBall/compute_prox.m b/+forbes/+functions/@IndRankBall/compute_prox.m new file mode 100644 index 0000000..4f2943d --- /dev/null +++ b/+forbes/+functions/@IndRankBall/compute_prox.m @@ -0,0 +1,10 @@ +function [p, v] = compute_prox(obj, x, ~) + if obj.method == 1 % using svds + [U, S, V] = svds(x, obj.r, 'largest'); + p = U*(S*V'); + elseif obj.method == 2 % using lansvd + [U, S, V] = lansvd(x, obj.r, 'L'); + p = U*(S*V'); + end + v = 0; +end diff --git a/+forbes/+functions/@IndSparseSphereL2/IndSparseSphereL2.m b/+forbes/+functions/@IndSparseSphereL2/IndSparseSphereL2.m new file mode 100644 index 0000000..95eaaf1 --- /dev/null +++ b/+forbes/+functions/@IndSparseSphereL2/IndSparseSphereL2.m @@ -0,0 +1,18 @@ +% INDSPARSESPHEREL2 Indicator function of the L2 (Euclidean) sphere +% intersected with the L0 pseudo-norm ball of a given radius + +classdef IndSparseSphereL2 < forbes.functions.Proximable + properties + N % L0 ball radius + R % L2 sphere radius + end + methods + function obj = IndSparseSphereL2(N, R) + if nargin < 2 + R = 1.0; + end + obj.N = N; + obj.R = R; + end + end +end diff --git a/+forbes/+functions/@IndSparseSphereL2/compute_prox.m b/+forbes/+functions/@IndSparseSphereL2/compute_prox.m new file mode 100644 index 0000000..b591cd7 --- /dev/null +++ b/+forbes/+functions/@IndSparseSphereL2/compute_prox.m @@ -0,0 +1,8 @@ +function [p, v] = compute_prox(obj, x, ~) + p = x; + [~, I] = sort(abs(p), 'descend'); + p(I(obj.N+1:end)) = 0; + normp = norm(p); + p = obj.R*p/normp; + v = 0; +end diff --git a/+forbes/+functions/@IndSphereL2/IndSphereL2.m b/+forbes/+functions/@IndSphereL2/IndSphereL2.m new file mode 100644 index 0000000..dac5f76 --- /dev/null +++ b/+forbes/+functions/@IndSphereL2/IndSphereL2.m @@ -0,0 +1,15 @@ +% INDSPHEREL2 Indicator function of the L2 (Euclidean) sphere + +classdef IndSphereL2 < forbes.functions.Proximable + properties + R % ball radius + end + methods + function obj = IndSphereL2(R) + if nargin < 1 + R = 1.0; + end + obj.R = R; + end + end +end diff --git a/+forbes/+functions/@IndSphereL2/compute_prox.m b/+forbes/+functions/@IndSphereL2/compute_prox.m new file mode 100644 index 0000000..c751704 --- /dev/null +++ b/+forbes/+functions/@IndSphereL2/compute_prox.m @@ -0,0 +1,5 @@ +function [p, v] = compute_prox(obj, x, gam) + normx = norm(x); + p = obj.R/normx * x; + v = 0.0; +end diff --git a/+forbes/+functions/@IndStiefelManifold/IndStiefelManifold.m b/+forbes/+functions/@IndStiefelManifold/IndStiefelManifold.m new file mode 100644 index 0000000..aa278ce --- /dev/null +++ b/+forbes/+functions/@IndStiefelManifold/IndStiefelManifold.m @@ -0,0 +1,8 @@ +% INDSTIEFELMANIFOLD Indicator function of the Stiefel manifold + +classdef IndStiefelManifold < forbes.functions.Proximable + methods + function obj = IndStiefelManifold() + end + end +end \ No newline at end of file diff --git a/+forbes/+functions/@IndStiefelManifold/compute_prox.m b/+forbes/+functions/@IndStiefelManifold/compute_prox.m new file mode 100644 index 0000000..6376b51 --- /dev/null +++ b/+forbes/+functions/@IndStiefelManifold/compute_prox.m @@ -0,0 +1,8 @@ +function [p, v] = compute_prox(obj, x, gam) + if size(x, 1) < size(x, 2) + error('argument must be a tall matrix'); + end + [U, ~, V] = svd(x, 'econ'); + p = U*V'; + v = 0; +end diff --git a/+forbes/+functions/@LQRCost/LQRCost.m b/+forbes/+functions/@LQRCost/LQRCost.m new file mode 100644 index 0000000..a9f96ae --- /dev/null +++ b/+forbes/+functions/@LQRCost/LQRCost.m @@ -0,0 +1,64 @@ +% LQRCOST LQR cost function + +classdef LQRCost < forbes.functions.Proximable + properties + x0, Q, R, Q_f, A, B, N, QR + LRs, Ss, Ks, Ms, Ls + tilt, diff + end + methods + function obj = LQRCost(x0, Q, R, Q_f, A, B, N, xref) + obj.x0 = x0; + obj.Q = Q; + obj.R = R; + obj.Q_f = Q_f; + obj.A = A; + obj.B = B; + obj.N = N; + obj.QR = blkdiag(obj.Q, obj.R); + if nargin > 7 + obj.tilt = [repmat([obj.Q*xref; zeros(size(obj.R, 1), 1)], obj.N, 1); obj.Q_f*xref]; + obj.diff = (obj.N+1)/2*norm(xref)^2; + else + obj.tilt = 0; + obj.diff = 0; + end + obj.RiccatiFactor(); + end + function RiccatiFactor(obj) + n = size(obj.Q,1); + m = size(obj.R,1); + Ps = zeros(n, n, obj.N+1); + Ps(:,:,obj.N+1) = obj.Q_f; + obj.LRs = zeros(m, m, obj.N); + obj.Ss = zeros(m, n, obj.N); + obj.Ks = zeros(m, n, obj.N); + obj.Ms = zeros(m, n, obj.N); + obj.Ls = zeros(n, n, obj.N); + for k = obj.N:-1:1 + Rbar = obj.R + obj.B'*(Ps(:,:,k+1)*obj.B); + Rbar = (Rbar+Rbar')/2; + LR = chol(Rbar, 'lower'); + obj.LRs(:,:,k) = LR; + obj.Ss(:,:,k) = obj.B'*(Ps(:,:,k+1)*obj.A); + obj.Ks(:,:,k) = -(LR'\(LR\obj.Ss(:,:,k))); + Ps(:,:,k) = obj.Q + obj.A'*(Ps(:,:,k+1)*obj.A) + obj.Ss(:,:,k)'*obj.Ks(:,:,k); + Ps(:,:,k) = (Ps(:,:,k) + Ps(:,:,k)')/2; + end + for k = 1:obj.N + LR = obj.LRs(:,:,k); + obj.Ms(:,:,k) = -(LR'\(LR\obj.B')); + obj.Ls(:,:,k) = (obj.A + obj.B*obj.Ks(:,:,k))'; + end + end + function set_x0(obj, x0) + obj.x0 = x0; + end + function p = is_strongly_convex(obj) + p = true; + end + function p = is_generalized_quadratic(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@LQRCost/compute_gradient_conjugate.m b/+forbes/+functions/@LQRCost/compute_gradient_conjugate.m new file mode 100644 index 0000000..44d64d3 --- /dev/null +++ b/+forbes/+functions/@LQRCost/compute_gradient_conjugate.m @@ -0,0 +1,19 @@ +function [xu, fcw] = compute_gradient_conjugate(obj, w) + [n_x, n_u] = size(obj.B); + [~, xu] = forbes.utils.RiccatiSolve(w+obj.tilt, obj.x0, obj.A, obj.B, obj.LRs, obj.Ks, obj.Ms, obj.Ls, int32(n_x), int32(n_u), int32(obj.N)); + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Less efficient +% fxu = 0; +% for i=0:obj.N-1 +% x_i = xu(i*n_xu+1:i*n_xu+n_x); +% u_i = xu(i*n_xu+n_x+1:(i+1)*n_xu); +% fxu = fxu + 0.5*(x_i'*(obj.Q*x_i) + u_i'*(obj.R*u_i)); +% end + % More efficient + XU_stage = reshape(xu(1:end-n_x), n_x + n_u, obj.N); + fxu = 0.5*sum(sum(XU_stage.*(obj.QR*XU_stage))); + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + x_N = xu(obj.N*(n_x+n_u)+1:end); + fxu = fxu + 0.5*(x_N'*(obj.Q_f*x_N)); + fcw = (w+obj.tilt)'*xu - fxu - obj.diff; +end diff --git a/+forbes/+functions/@LeastSquares/LeastSquares.m b/+forbes/+functions/@LeastSquares/LeastSquares.m new file mode 100644 index 0000000..acb3ad7 --- /dev/null +++ b/+forbes/+functions/@LeastSquares/LeastSquares.m @@ -0,0 +1,41 @@ +% LEASTSQUARES Construct a linear least-squares cost function + +classdef LeastSquares < forbes.functions.Proximable + properties + A, b, lam + Atb + S % to store square matrix, i.e., A'A or AA' depending on size + L_prox % to store Cholesky factor for prox computation + gam_prox + flag_sparse + time_fact + end + methods + function obj = LeastSquares(A, b, lam) + if nargin < 2, b = 0; end + if nargin < 3, lam = 1.0; end + obj.A = A; + obj.b = b; + obj.lam = lam; + obj.Atb = A'*b; + if issparse(A) + obj.flag_sparse = true; + else + obj.flag_sparse = false; + if size(A, 1) <= size(A, 2) + obj.S = A*A'; + else + obj.S = A'*A; + end + end + obj.gam_prox = 0; + obj.time_fact = 0; + end + function p = is_convex(obj) + p = true; + end + function p = is_quadratic(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@LeastSquares/compute_gradient.m b/+forbes/+functions/@LeastSquares/compute_gradient.m new file mode 100644 index 0000000..8ff1a7d --- /dev/null +++ b/+forbes/+functions/@LeastSquares/compute_gradient.m @@ -0,0 +1,5 @@ +function [g, v] = compute_gradient(obj, x) + res = obj.A*x - obj.b; + g = obj.A'*res; + v = 0.5*obj.lam*norm(res)^2; +end diff --git a/+forbes/+functions/@LeastSquares/compute_prox.m b/+forbes/+functions/@LeastSquares/compute_prox.m new file mode 100644 index 0000000..d5b2fb8 --- /dev/null +++ b/+forbes/+functions/@LeastSquares/compute_prox.m @@ -0,0 +1,35 @@ +function [p, v] = compute_prox(obj, x, gam) + lamgam = obj.lam*gam; + if gam ~= obj.gam_prox % more robust test? + % factor matrix when gam changes (or at first call) + obj.gam_prox = gam; + I = speye(size(obj.S)); + t0 = tic(); + if obj.flag_sparse + if size(obj.A, 1) <= size(obj.A, 2) + obj.L_prox = ldlchol(obj.A, 1/lamgam); + else + obj.L_prox = ldlchol(obj.A', 1/lamgam); + end + else + obj.L_prox = chol(obj.S + I/lamgam); + end + obj.time_fact = toc(t0); + end + q = obj.Atb + x/lamgam; + if size(obj.A, 1) <= size(obj.A, 2) + if obj.flag_sparse + s = ldlsolve(obj.L_prox, obj.A*q); + else + s = obj.L_prox\(obj.L_prox'\(obj.A*q)); + end + p = lamgam*(q - obj.A'*s); + else + if obj.flag_sparse + p = ldlsolve(obj.L_prox, q); + else + p = obj.L_prox\(obj.L_prox'\q); + end + end + v = (obj.lam/2)*norm(obj.A*p-obj.b, 2)^2; +end diff --git a/+forbes/+functions/@LogisticLoss/LogisticLoss.m b/+forbes/+functions/@LogisticLoss/LogisticLoss.m new file mode 100644 index 0000000..adae313 --- /dev/null +++ b/+forbes/+functions/@LogisticLoss/LogisticLoss.m @@ -0,0 +1,22 @@ +% LOGISTICLOSS Logistic loss function + +classdef LogisticLoss < forbes.functions.Proximable + properties + mu % coefficient + end + methods + function obj = LogisticLoss(mu) + if nargin < 1, mu = 1; end + obj.mu = mu; + end + function p = is_convex(obj) + p = true; + end + function p = is_smooth(obj) + p = true; + end + function p = has_hessian(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@LogisticLoss/compute_gradient.m b/+forbes/+functions/@LogisticLoss/compute_gradient.m new file mode 100644 index 0000000..081c39e --- /dev/null +++ b/+forbes/+functions/@LogisticLoss/compute_gradient.m @@ -0,0 +1,13 @@ +function [grad, val, hess] = compute_gradient(obj, x) + emx = exp(-x); + invpx = (1+emx); + val = sum(log(invpx))*obj.mu; + if nargout >= 2 + px = 1./invpx; + grad = (px-1)*obj.mu; + if nargout >= 3 + h = px.*(1-px); + hess = obj.mu*diag(sparse(h)); + end + end +end diff --git a/+forbes/+functions/@NormL0/NormL0.m b/+forbes/+functions/@NormL0/NormL0.m new file mode 100644 index 0000000..2112a4d --- /dev/null +++ b/+forbes/+functions/@NormL0/NormL0.m @@ -0,0 +1,13 @@ +% NORML0 L0 pseudo-norm + +classdef NormL0 < forbes.functions.Proximable + properties + lam % coefficient + end + methods + function obj = NormL0(lam) + if nargin < 1, lam = 1.0; end + obj.lam = lam; + end + end +end diff --git a/+forbes/+functions/@NormL0/compute_prox.m b/+forbes/+functions/@NormL0/compute_prox.m new file mode 100644 index 0000000..eabe423 --- /dev/null +++ b/+forbes/+functions/@NormL0/compute_prox.m @@ -0,0 +1,5 @@ +function [p, v] = compute_prox(obj, x, gam) + over = abs(x) > sqrt(2*gam*obj.lam); + p = x.*over; + v = obj.lam*nnz(p); +end \ No newline at end of file diff --git a/+forbes/+functions/@NormL1/NormL1.m b/+forbes/+functions/@NormL1/NormL1.m new file mode 100644 index 0000000..3a85bd3 --- /dev/null +++ b/+forbes/+functions/@NormL1/NormL1.m @@ -0,0 +1,16 @@ +% NORML1 L1 norm + +classdef NormL1 < forbes.functions.Proximable + properties + lam % coefficient + end + methods + function obj = NormL1(lam) + if nargin < 1, lam = 1.0; end + obj.lam = lam; + end + function p = is_convex(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@NormL1/compute_prox.m b/+forbes/+functions/@NormL1/compute_prox.m new file mode 100644 index 0000000..1d20dd5 --- /dev/null +++ b/+forbes/+functions/@NormL1/compute_prox.m @@ -0,0 +1,5 @@ +function [p, v] = compute_prox(obj, x, gam) + uz = max(0.0, abs(x) - obj.lam*gam); + p = sign(x).*uz; + v = obj.lam*sum(uz(:)); +end diff --git a/+forbes/+functions/@NormL2/NormL2.m b/+forbes/+functions/@NormL2/NormL2.m new file mode 100644 index 0000000..25e1201 --- /dev/null +++ b/+forbes/+functions/@NormL2/NormL2.m @@ -0,0 +1,16 @@ +% NORML2 L2 (Euclidean) norm + +classdef NormL2 < forbes.functions.Proximable + properties + lam % coefficient + end + methods + function obj = NormL2(lam) + if nargin < 1, lam = 1.0; end + obj.lam = lam; + end + function p = is_convex(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@NormL2/compute_prox.m b/+forbes/+functions/@NormL2/compute_prox.m new file mode 100644 index 0000000..5390134 --- /dev/null +++ b/+forbes/+functions/@NormL2/compute_prox.m @@ -0,0 +1,12 @@ +function [p, v] = compute_prox(obj, x, gam) + normx = norm(x, 'fro'); + lamgam = obj.lam*gam; + if normx <= lamgam + p = zeros(size(x)); + v = 0; + else + scal = (1-lamgam/normx); + p = (1-lamgam/normx)*x; + v = obj.lam*scal*normx; + end +end \ No newline at end of file diff --git a/+forbes/+functions/@NormLhalf/NormLhalf.m b/+forbes/+functions/@NormLhalf/NormLhalf.m new file mode 100644 index 0000000..e333515 --- /dev/null +++ b/+forbes/+functions/@NormLhalf/NormLhalf.m @@ -0,0 +1,13 @@ +% NORMLHALF L(1/2) pseudo-norm + +classdef NormLhalf < forbes.functions.Proximable + properties + lam % coefficient + end + methods + function obj = NormLhalf(lam) + if nargin < 1, lam = 1.0; end + obj.lam = lam; + end + end +end diff --git a/+forbes/+functions/@NormLhalf/compute_prox.m b/+forbes/+functions/@NormLhalf/compute_prox.m new file mode 100644 index 0000000..f84e192 --- /dev/null +++ b/+forbes/+functions/@NormLhalf/compute_prox.m @@ -0,0 +1,13 @@ +function [p, v] = compute_prox(obj, x, gam) +% The implementation of the proximal mappings is based on: +% Cao, Sun, Xu, "Fast image deconvolution using closed-form +% thresholding formulas of L_q (q=1/2,2/3) regularization" (2013) + mu = 2*gam*obj.lam; + q = nthroot(54, 3)/4*nthroot(mu, 3)^2; + absx = abs(x); + phi = acos((mu/8)*sqrt((absx/3).^(-3))); + ind0 = (absx <= q); + p(ind0, 1) = 0; + p(~ind0, 1) = (2/3)*(sign(x(~ind0)).*absx(~ind0).*(1+cos((2/3)*(pi-phi(~ind0))))); + v = obj.lam*sum(abs(p).^(0.5)); +end diff --git a/+forbes/+functions/@NuclearNorm/NuclearNorm.m b/+forbes/+functions/@NuclearNorm/NuclearNorm.m new file mode 100644 index 0000000..d315942 --- /dev/null +++ b/+forbes/+functions/@NuclearNorm/NuclearNorm.m @@ -0,0 +1,17 @@ +% NUCLEARNORM Nuclear norm function + +classdef NuclearNorm < forbes.functions.Proximable + properties + lam, mode, method, nsv + end + methods + function obj = NuclearNorm(lam, mode, method) + if nargin < 2, mode = 'exact'; end + if nargin < 3, method = 'svds'; end + obj.lam = lam; + obj.mode = mode; + obj.method = method; + obj.nsv = 10; + end + end +end diff --git a/+forbes/+functions/@NuclearNorm/compute_prox.m b/+forbes/+functions/@NuclearNorm/compute_prox.m new file mode 100644 index 0000000..16b4a15 --- /dev/null +++ b/+forbes/+functions/@NuclearNorm/compute_prox.m @@ -0,0 +1,28 @@ +function [y, v] = compute_prox(obj, x, gam) + switch obj.mode + case 'exact' % exact prox + [U, S, V] = svd(x, 'econ'); + diagS1 = max(0, diag(S)-obj.lam*gam); + S1 = diag(sparse(diagS1)); + y = U*(S1*V'); + v = obj.lam*sum(diagS1); + case 'inexact' + [m, n] = size(x); + maxrank = min(m, n); + [U, S, V] = svds(x, obj.nsv, 'L'); + diagS1 = max(0, diag(S)-obj.lam*gam); + if nnz(diagS1) == length(diagS1) + obj.nsv = min(maxrank, obj.nsv+5); + else + obj.nsv = nnz(diagS1)+1; + end + S1 = diag(sparse(diagS1)); + y = U*(S1*V'); + if nargout >= 2 + v = obj.lam*sum(diagS1); + end + otherwise + % TODO: implement these + error('not supported'); + end +end diff --git a/+forbes/+functions/@Precompose/Precompose.m b/+forbes/+functions/@Precompose/Precompose.m new file mode 100644 index 0000000..431a9c2 --- /dev/null +++ b/+forbes/+functions/@Precompose/Precompose.m @@ -0,0 +1,23 @@ +% PRECOMPOSE Precomposition with a linear operator +% +% PRECOMPOSE(f, L, mu) returns the function f(L*x), where L is a linear +% operator such that L*L' = mu*I, for mu > 0. + +classdef Precompose < forbes.functions.Proximable + properties + f, L, mu + end + methods + function obj = Precompose(f, L, mu) +% Here L is a Gram-diagonal operator +% i.e. such that L*L' = mu*I, mu > 0 + if isscalar(L) && nargin > 2 + warning('when L is scalar, mu = L^2; ignoring third argument'); + mu = L^2; + end + obj.f = f; + obj.L = L; + obj.mu = mu; + end + end +end diff --git a/+forbes/+functions/@Precompose/compute_prox.m b/+forbes/+functions/@Precompose/compute_prox.m new file mode 100644 index 0000000..0821498 --- /dev/null +++ b/+forbes/+functions/@Precompose/compute_prox.m @@ -0,0 +1,8 @@ +function [p, v] = compute_prox(obj, x, gam) +% See Prop. 23.32 in Bauschke, Combettes +% "Convex Analysis and Monotone Operator Theory in Hilber Spaces", +% 1st ed, 2011 + Lx = obj.L*x; + [p1, v] = obj.f.compute_prox(Lx, gam); + p = x + obj.L'*(p1 - Lx)/obj.mu; +end diff --git a/+forbes/+functions/@Proximable/Proximable.m b/+forbes/+functions/@Proximable/Proximable.m new file mode 100644 index 0000000..134b1a1 --- /dev/null +++ b/+forbes/+functions/@Proximable/Proximable.m @@ -0,0 +1,76 @@ +classdef Proximable < handle + properties + cnt_prox + cnt_gradient + end + methods + function obj = Proximable() + obj.cnt_prox = 0; + obj.cnt_gradient = 0; + end + function reset(obj) + obj.cnt_prox = 0; + obj.cnt_gradient = 0; + end + function v = call(obj, x) + [~, v] = obj.gradient(x); + end + function [p, v] = prox(obj, x, gamma) + [p, v] = obj.compute_prox(x, gamma); + obj.cnt_prox = obj.cnt_prox + 1; + end + function [g, v] = gradient(obj, x) + [g, v] = obj.compute_gradient(x); + obj.cnt_gradient = obj.cnt_gradient + 1; + end + function p = is_prox_accurate(obj) + p = true; + end + function p = is_separable(obj) + p = false; + end + function p = is_convex(obj) + p = false; + end + function p = is_singleton(obj) + p = false; + end + function p = is_cone(obj) + p = false; + end + function p = is_affine(obj) + p = obj.is_singleton(); + end + function p = is_set(obj) + p = obj.is_affine() || obj.is_cone(); + end + function p = is_smooth(obj) + p = obj.is_quadratic(); + end + function p = is_quadratic(obj) + p = false; + end + function p = is_generalized_quadratic(obj) + p = obj.is_quadratic() || obj.is_affine(); + end + function p = is_strongly_convex(obj) + p = false; + end + function p = has_hessian(obj) + p = false; + end + function p = is_null(obj) + p = false; + end + end + methods (Static) + function mu = get_gram_diagonal(M) + mus = M*(M'*ones(size(M, 1), 1)); + if (max(mus)-min(mus))/(1+abs(min(mus))) > 1e-14 + mu = 0; + else + mu = 0.5*(max(mus)+min(mus)); + end + end + end +end diff --git a/+forbes/+functions/@Quadratic/Quadratic.m b/+forbes/+functions/@Quadratic/Quadratic.m new file mode 100644 index 0000000..3cd4b7f --- /dev/null +++ b/+forbes/+functions/@Quadratic/Quadratic.m @@ -0,0 +1,41 @@ +% QUADRATIC Quadratic function + +classdef Quadratic < forbes.functions.Proximable + properties + Q, q % Hessian and linear term + L_prox % stores Cholesky factor for prox + gam_prox % stores most recently used parameter for prox + L_conj + p_conj + flag_sparse + flag_large + end + methods + function obj = Quadratic(Q, q) + if issparse(Q) + obj.flag_sparse = true; + else + obj.flag_sparse = false; + end + if size(Q, 2) > 5000 + obj.flag_large = true; + else + obj.flag_large = false; + end + obj.Q = Q; + obj.q = q; + obj.gam_prox = 0; + end + function p = is_quadratic(obj) + p = true; + end + function p = is_convex(obj) + % TODO implement this for real + p = true; + end + function p = is_strongly_convex(obj) + % TODO implement this for real + p = true; + end + end +end diff --git a/+forbes/+functions/@Quadratic/compute_gradient.m b/+forbes/+functions/@Quadratic/compute_gradient.m new file mode 100644 index 0000000..0450f5d --- /dev/null +++ b/+forbes/+functions/@Quadratic/compute_gradient.m @@ -0,0 +1,5 @@ +function [g, v, Q] = compute_gradient(obj, x) + g = obj.Q*x+obj.q; + v = 0.5*(g+obj.q)'*x; + Q = obj.Q; +end diff --git a/+forbes/+functions/@Quadratic/compute_gradient_conjugate.m b/+forbes/+functions/@Quadratic/compute_gradient_conjugate.m new file mode 100644 index 0000000..52cfbc0 --- /dev/null +++ b/+forbes/+functions/@Quadratic/compute_gradient_conjugate.m @@ -0,0 +1,27 @@ +function [g, v] = compute_gradient_conjugate(obj, y) + if isempty(obj.L_conj) + factor_gradient_conjugate(obj); + end + if obj.flag_sparse + rhs = y-obj.q; + g(obj.p_conj,1) = obj.L_conj'\(obj.L_conj\rhs(obj.p_conj)); + v = 0.5*(y-obj.q)'*g; + else + g = obj.L_conj'\(obj.L_conj\(y-obj.q)); + v = 0.5*(y-obj.q)'*g; + end +end + +function factor_gradient_conjugate(obj) + if obj.flag_sparse + [obj.L_conj,flag,obj.p_conj] = chol(obj.Q,'lower','vector'); + if flag~=0 + error('Q is not positive definite') + end + else + [obj.L_conj,flag] = chol(obj.Q,'lower'); + if flag~=0 + error('Q is not positive definite') + end + end +end diff --git a/+forbes/+functions/@Quadratic/compute_prox.m b/+forbes/+functions/@Quadratic/compute_prox.m new file mode 100644 index 0000000..11827c0 --- /dev/null +++ b/+forbes/+functions/@Quadratic/compute_prox.m @@ -0,0 +1,23 @@ +function [p, v] = compute_prox(obj, x, gam) + n = length(x); + I = speye(n); + if obj.flag_large + [p, flag] = pcg(I + gam*obj.Q, x - gam*obj.q); + else + if gam ~= obj.gam_prox % more robust test? + % factor matrix when gam changes (or at first call) + obj.gam_prox = gam; + if obj.flag_sparse + obj.L_prox = ldlchol(I + gam*obj.Q); + else + obj.L_prox = chol(I + gam*obj.Q); % do differently for sparse? + end + end + if obj.flag_sparse + p = ldlsolve(obj.L_prox, x - gam*obj.q); + else + p = obj.L_prox\(obj.L_prox'\(x - gam*obj.q)); + end + end + v = 0.5*(p'*(obj.Q*p)) + obj.q'*p; % can we save something here? +end diff --git a/+forbes/+functions/@ScaleTranslate/ScaleTranslate.m b/+forbes/+functions/@ScaleTranslate/ScaleTranslate.m new file mode 100644 index 0000000..57cbed1 --- /dev/null +++ b/+forbes/+functions/@ScaleTranslate/ScaleTranslate.m @@ -0,0 +1,51 @@ +% SCALETRANSLATE Scales and translates a function input +% +% SCALETRANSLATE(f, a, b) returns the function f(a.*x - b). +% +% Only scalar a (i.e., uniform scaling) is currently available. + +classdef ScaleTranslate < forbes.functions.Proximable + properties + f, a, b + end + methods + function obj = ScaleTranslate(f, a, b) + if nargin < 1, a = 1.0; end + if nargin < 2, b = 0.0; end + if ~isscalar(a) + error('only uniform scaling is currently available; a should be a scalar'); + end + % TODO: check that a ~= 0.0 + obj.f = f; + obj.a = a; + obj.b = b; + end + function p = is_generalized_quadratic(obj) + p = obj.f.is_generalized_quadratic(); + end + function p = is_quadratic(obj) + p = obj.f.is_quadratic(); + end + function p = is_smooth(obj) + p = obj.f.is_smooth(); + end + function p = is_convex(obj) + p = obj.f.is_convex(); + end + function p = is_strongly_convex(obj) + p = obj.f.is_strongly_convex(); + end + function p = is_singleton(obj) + p = obj.f.is_singleton(); + end + function p = is_cone(obj) + p = obj.f.is_cone(); + end + function p = is_affine(obj) + p = obj.f.is_affine(); + end + function p = is_set(obj) + p = obj.f.is_set(); + end + end +end diff --git a/+forbes/+functions/@ScaleTranslate/compute_gradient.m b/+forbes/+functions/@ScaleTranslate/compute_gradient.m new file mode 100644 index 0000000..73f6518 --- /dev/null +++ b/+forbes/+functions/@ScaleTranslate/compute_gradient.m @@ -0,0 +1,4 @@ +function [grad, val] = compute_gradient(obj, x) + [grad1, val] = obj.f.compute_gradient(obj.a*x + obj.b); + grad = obj.a*grad1; +end diff --git a/+forbes/+functions/@ScaleTranslate/compute_prox.m b/+forbes/+functions/@ScaleTranslate/compute_prox.m new file mode 100644 index 0000000..545a160 --- /dev/null +++ b/+forbes/+functions/@ScaleTranslate/compute_prox.m @@ -0,0 +1,5 @@ +function [p, v] = compute_prox(obj, x, gam) + a2 = obj.a^2; + [p1, v] = obj.f.compute_prox(obj.a*x + obj.b, a2*gam); + p = (p1 - obj.b)/obj.a; +end diff --git a/+forbes/+functions/@SeparableSum/SeparableSum.m b/+forbes/+functions/@SeparableSum/SeparableSum.m new file mode 100644 index 0000000..c8e5da9 --- /dev/null +++ b/+forbes/+functions/@SeparableSum/SeparableSum.m @@ -0,0 +1,75 @@ +% SEPARABLESUM Separable sum of functions + +classdef SeparableSum < forbes.functions.Proximable + properties + fs + dims + idx + dimsum + end + methods + function obj = SeparableSum(fs, dims, idx) + l = length(fs); + if nargin < 3 + idx = 1:l; + end + for i = 1:length(dims) + if numel(dims{i}) == 1, dims{i} = [dims{i}, 1]; end + end + dimsum = zeros(length(idx), 1); + dimsum(1) = prod(dims{1}); + for i = 2:length(idx) + dimsum(i) = dimsum(i-1) + prod(dims{i}); + end + obj.fs = fs; + obj.dims = dims; + obj.idx = idx; + obj.dimsum = dimsum; + end + function p = is_quadratic(obj) + p = true; + for i = 1:length(obj.fs) + if ~obj.fs{i}.is_quadratic() + p = false; + break; + end + end + end + function p = is_convex(obj) + p = true; + for i = 1:length(obj.fs) + if ~obj.fs{i}.is_convex() + p = false; + break; + end + end + end + function p = is_strongly_convex(obj) + p = true; + for i = 1:length(obj.fs) + if ~obj.fs{i}.is_strongly_convex() + p = false; + break; + end + end + end + function p = is_generalized_quadratic(obj) + p = true; + for i = 1:length(obj.fs) + if ~obj.fs{i}.is_generalized_quadratic() + p = false; + break; + end + end + end + function p = is_smooth(obj) + p = true; + for i = 1:length(obj.fs) + if ~obj.fs{i}.is_smooth() + p = false; + break; + end + end + end + end +end diff --git a/+forbes/+functions/@SeparableSum/compute_gradient.m b/+forbes/+functions/@SeparableSum/compute_gradient.m new file mode 100644 index 0000000..6ab374d --- /dev/null +++ b/+forbes/+functions/@SeparableSum/compute_gradient.m @@ -0,0 +1,12 @@ +function [g, v] = compute_gradient(obj, x) + g = zeros(obj.dimsum(end), 1); + v = 0; + baseidx = 1; + for i = 1:length(obj.idx) + xcurr = reshape(x(baseidx:obj.dimsum(i)), obj.dims{i}); + [g1, v1] = obj.fs{obj.idx(i)}.compute_gradient(xcurr); + g(baseidx:obj.dimsum(i)) = g1; + v = v + v1; + baseidx = obj.dimsum(i)+1; + end +end diff --git a/+forbes/+functions/@SeparableSum/compute_prox.m b/+forbes/+functions/@SeparableSum/compute_prox.m new file mode 100644 index 0000000..0315b90 --- /dev/null +++ b/+forbes/+functions/@SeparableSum/compute_prox.m @@ -0,0 +1,12 @@ +function [p, v] = compute_prox(obj, x, gam) + p = zeros(obj.dimsum(end), 1); + v = 0; + baseidx = 1; + for i = 1:length(obj.idx) + xcurr = reshape(x(baseidx:obj.dimsum(i)), obj.dims{i}); + [p1, v1] = obj.fs{obj.idx(i)}.compute_prox(xcurr, gam); + p(baseidx:obj.dimsum(i)) = p1; + v = v + v1; + baseidx = obj.dimsum(i)+1; + end +end diff --git a/+forbes/+functions/@SeparableSum/test.m b/+forbes/+functions/@SeparableSum/test.m new file mode 100644 index 0000000..1085302 --- /dev/null +++ b/+forbes/+functions/@SeparableSum/test.m @@ -0,0 +1,16 @@ +function test(obj) + gam = 0.5 + rand(); + x = []; + y = []; + v = 0; + for i = 1:length(obj.idx) + x1 = randn(obj.dims{i}, 1); + x = [x; x1]; + [y1, v1] = obj.fs{obj.idx(i)}.prox(x1, gam); + y = [y; y1]; + v = v + v1; + end + [y_test, v_test] = obj.prox(x, gam); + assert(norm(y_test - y)/(1+norm(y)) <= 1e-12); + assert(abs(v_test - v)/(1+abs(v)) <= 1e-12); +end \ No newline at end of file diff --git a/+forbes/+functions/@SqrDistL2/SqrDistL2.m b/+forbes/+functions/@SqrDistL2/SqrDistL2.m new file mode 100644 index 0000000..f1c7f14 --- /dev/null +++ b/+forbes/+functions/@SqrDistL2/SqrDistL2.m @@ -0,0 +1,21 @@ +% SQRDISTL2 Squared L2 (Euclidean) distance from a convex set + +classdef SqrDistL2 < forbes.functions.Proximable + properties + ind % indicator function of the (convex) set + lam + end + methods + function obj = SqrDistL2(ind, lam) + if nargin < 2, lam = 1.0; end + obj.ind = ind; + obj.lam = lam; + end + function p = is_convex(obj) + p = true; + end + function p = is_smooth(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@SqrDistL2/compute_prox.m b/+forbes/+functions/@SqrDistL2/compute_prox.m new file mode 100644 index 0000000..cb82423 --- /dev/null +++ b/+forbes/+functions/@SqrDistL2/compute_prox.m @@ -0,0 +1,6 @@ +function [p, v] = compute_prox(obj, x, gam) + proj_x = obj.ind.compute_prox(x, 1.0); + lamgam = obj.ind.lam * gam; + p = (x+lamgam*proj_x)/(1+lamgam); + v = 0.5/(lamgam+1.0)*norm(proj_x-x, 'fro')^2-0.5/lamgam*norm(p-x, 'fro')^2; +end diff --git a/+forbes/+functions/@SqrNormL2/SqrNormL2.m b/+forbes/+functions/@SqrNormL2/SqrNormL2.m new file mode 100644 index 0000000..63fbae3 --- /dev/null +++ b/+forbes/+functions/@SqrNormL2/SqrNormL2.m @@ -0,0 +1,25 @@ +% SQRNORML2 Squared L2 (Euclidean) norm + +classdef SqrNormL2 < forbes.functions.Proximable + properties + w % weight(s) + end + methods + function obj = SqrNormL2(w) + if nargin < 1, w = 1; end + obj.w = w; + end + function p = is_convex(obj) + p = true; + end + function p = is_strongly_convex(obj) + p = all(obj.w > 0); + end + function p = is_quadratic(obj) + p = true; + end + function p = has_hessian(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@SqrNormL2/compute_gradient.m b/+forbes/+functions/@SqrNormL2/compute_gradient.m new file mode 100644 index 0000000..9035171 --- /dev/null +++ b/+forbes/+functions/@SqrNormL2/compute_gradient.m @@ -0,0 +1,7 @@ +function [g, v, H] = compute_gradient(obj, x) + g = obj.w .* x; + v = 0.5*(x(:)'*g(:)); + if nargout >= 3 + H = @(x) obj.w .* x; + end +end diff --git a/+forbes/+functions/@SqrNormL2/compute_gradient_conjugate.m b/+forbes/+functions/@SqrNormL2/compute_gradient_conjugate.m new file mode 100644 index 0000000..ce03d86 --- /dev/null +++ b/+forbes/+functions/@SqrNormL2/compute_gradient_conjugate.m @@ -0,0 +1,10 @@ +function [g, v] = compute_gradient_conjugate(obj, x) + if any(obj.w == 0) + error('the conjugate is not smooth'); + end + g = x ./ obj.w; + v = 0.5*(x(:)'*g(:)); + if nargout >= 3 + H = @(x) x ./ obj.w; + end +end diff --git a/+forbes/+functions/@SqrNormL2/compute_prox.m b/+forbes/+functions/@SqrNormL2/compute_prox.m new file mode 100644 index 0000000..1535a14 --- /dev/null +++ b/+forbes/+functions/@SqrNormL2/compute_prox.m @@ -0,0 +1,5 @@ +function [prox, val] = compute_prox(obj, x, gam) + wgam = obj.w .* gam; + prox = x./(1+wgam); + val = ((obj.w .* prox(:))'*prox(:))/2; +end diff --git a/+forbes/+functions/@Zero/Zero.m b/+forbes/+functions/@Zero/Zero.m new file mode 100644 index 0000000..7eab6dc --- /dev/null +++ b/+forbes/+functions/@Zero/Zero.m @@ -0,0 +1,17 @@ +% ZERO Null function + +classdef Zero < forbes.functions.Proximable + methods + function obj = Zero() + end + function p = is_convex(obj) + p = true; + end + function p = is_quadratic(obj) + p = true; + end + function p = is_null(obj) + p = true; + end + end +end diff --git a/+forbes/+functions/@Zero/compute_gradient.m b/+forbes/+functions/@Zero/compute_gradient.m new file mode 100644 index 0000000..3098583 --- /dev/null +++ b/+forbes/+functions/@Zero/compute_gradient.m @@ -0,0 +1,4 @@ +function [g, v] = compute_gradient(obj, x) + g = 0.0; + v = 0.0; +end diff --git a/+forbes/+functions/@Zero/compute_prox.m b/+forbes/+functions/@Zero/compute_prox.m new file mode 100644 index 0000000..76b3a63 --- /dev/null +++ b/+forbes/+functions/@Zero/compute_prox.m @@ -0,0 +1,4 @@ +function [prox, val] = compute_prox(obj, x, gam) + prox = x; + val = 0.0; +end diff --git a/+forbes/+operators/@LBFGS/LBFGS.m b/+forbes/+operators/@LBFGS/LBFGS.m new file mode 100644 index 0000000..b051612 --- /dev/null +++ b/+forbes/+operators/@LBFGS/LBFGS.m @@ -0,0 +1,29 @@ +classdef LBFGS < handle +% LBFGS creates a limited-memory BFGS operator +% +% H = LBFGS(mem) constructs a limited-memory BFGS operator with memory mem. +% +% H.push(s, y) pushes a new pair of vectors (s, y) into the buffer. +% +% u = H*v applies H to vector v; use -(H*v) if the minus is needed. + properties + mem % memory + S, Y, YS, H0, idx, currmem + cntskip + end + methods + function obj = LBFGS(mem) + obj.mem = mem; + obj.S = []; + obj.Y = []; + obj.YS = []; + obj.idx = 0; + obj.currmem = 0; + obj.cntskip = 0; + end + function reset(obj) + obj.idx = 0; + obj.currmem = 0; + end + end +end diff --git a/+forbes/+operators/@LBFGS/mtimes.m b/+forbes/+operators/@LBFGS/mtimes.m new file mode 100644 index 0000000..7974e1b --- /dev/null +++ b/+forbes/+operators/@LBFGS/mtimes.m @@ -0,0 +1,8 @@ +function u = mtimes(obj, v) + if obj.currmem > 0 + H = obj.YS(obj.idx)/(obj.Y(:,obj.idx)'*obj.Y(:,obj.idx)); + u = forbes.utils.lbfgs_mex(obj.S, obj.Y, obj.YS, H, v, int32(obj.idx), int32(obj.currmem)); + else + u = v; + end +end diff --git a/+forbes/+operators/@LBFGS/push.m b/+forbes/+operators/@LBFGS/push.m new file mode 100644 index 0000000..f279987 --- /dev/null +++ b/+forbes/+operators/@LBFGS/push.m @@ -0,0 +1,12 @@ +function push(obj, s, y) + ys = y'*s; + if ys > 0.0 + obj.idx = 1+mod(obj.idx, obj.mem); + obj.currmem = min(obj.currmem+1, obj.mem); + obj.S(:,obj.idx) = s; + obj.Y(:,obj.idx) = y; + obj.YS(obj.idx) = ys; + else + obj.cntskip = obj.cntskip + 1; + end +end diff --git a/@ProblemComposite/EstimateLipschitz.m b/+forbes/+problems/@ProblemComposite/EstimateLipschitz.m similarity index 74% rename from @ProblemComposite/EstimateLipschitz.m rename to +forbes/+problems/@ProblemComposite/EstimateLipschitz.m index 24a2d60..5c35930 100644 --- a/@ProblemComposite/EstimateLipschitz.m +++ b/+forbes/+problems/@ProblemComposite/EstimateLipschitz.m @@ -6,7 +6,7 @@ return; end -if prob.istheref1 && ~prob.istheref2 && prob.f1.isConvex +if prob.istheref1 && ~prob.istheref2 && prob.f1.is_convex() % if the smooth term is purely quadratic % then compute L 'exactly' eigsOpt.issym = 1; @@ -31,13 +31,13 @@ if prob.isthereC1 C1x0 = prob.C1*prob.x0; C1x1 = prob.C1*(prob.x0+delta); - [~, gradf1C1x0] = prob.callf1(C1x0); - [~, gradf1C1x1] = prob.callf1(C1x1); + [gradf1C1x0, ~] = prob.f1.gradient(C1x0); + [gradf1C1x1, ~] = prob.f1.gradient(C1x1); grad1x0 = prob.C1'*gradf1C1x0; grad1x1 = prob.C1'*gradf1C1x1; else - [~, grad1x0] = prob.callf1(prob.x0); - [~, grad1x1] = prob.callf1(prob.x0+delta); + [grad1x0, ~] = prob.f1.gradient(prob.x0); + [grad1x1, ~] = prob.f1.gradient(prob.x0+delta); end else grad1x0 = 0; @@ -48,13 +48,13 @@ if prob.isthereC2 C2x0 = prob.C2*prob.x0; C2x1 = prob.C2*(prob.x0+delta); - [~, gradf2C2x0] = prob.callf2(C2x0); - [~, gradf2C2x1] = prob.callf2(C2x1); + [gradf2C2x0, ~] = prob.f2.gradient(C2x0); + [gradf2C2x1, ~] = prob.f2.gradient(C2x1); grad2x0 = prob.C2'*gradf2C2x0; grad2x1 = prob.C2'*gradf2C2x1; else - [~, grad2x0] = prob.callf2(prob.x0); - [~, grad2x1] = prob.callf2(prob.x0+delta); + [grad2x0, ~] = prob.f2.gradient(prob.x0); + [grad2x1, ~] = prob.f2.gradient(prob.x0+delta); end else grad2x0 = 0; diff --git a/@ProblemComposite/Get_DualPoints.m b/+forbes/+problems/@ProblemComposite/Get_DualPoints.m similarity index 72% rename from @ProblemComposite/Get_DualPoints.m rename to +forbes/+problems/@ProblemComposite/Get_DualPoints.m index 9d4990f..5ebae44 100644 --- a/@ProblemComposite/Get_DualPoints.m +++ b/+forbes/+problems/@ProblemComposite/Get_DualPoints.m @@ -2,19 +2,19 @@ Ax = 0; if prob.istheref1 - [~, x1] = prob.callf1(prob.C1*y); + [x1, ~] = prob.f1.gradient(prob.C1*y); Ax = Ax - prob.C1'*x1; else x1 = []; end if prob.istheref2 - [~, x2] = prob.callf2(prob.C2*y); + [x2, ~] = prob.f2.gradient(prob.C2*y); Ax = Ax - prob.C2'*x2; else x2 = []; end w = -gam*prob.D*(prob.lin - Ax - y/gam); -u = -prob.D'*(prob.callg(w, prob.mu*gam) - w)/(prob.mu*gam); +u = -prob.D'*(prob.g.prox(w, prob.mu*gam) - w)/(prob.mu*gam); z = (prob.D*u)/prob.mu; %w + prob.D'*(proxg(prob.D*w, prob.mu/gam) - prob.D*w)/prob.mu; %-prob.D'*prob.callg((prob.D*w)/(prob.mu*gam), 1/(prob.mu*gam)); diff --git a/@ProblemComposite/Get_Lipschitz.m b/+forbes/+problems/@ProblemComposite/Get_Lipschitz.m similarity index 100% rename from @ProblemComposite/Get_Lipschitz.m rename to +forbes/+problems/@ProblemComposite/Get_Lipschitz.m diff --git a/@ProblemComposite/HessianQuadratic.m b/+forbes/+problems/@ProblemComposite/HessianQuadratic.m similarity index 64% rename from @ProblemComposite/HessianQuadratic.m rename to +forbes/+problems/@ProblemComposite/HessianQuadratic.m index ea6795f..a6d6209 100644 --- a/@ProblemComposite/HessianQuadratic.m +++ b/+forbes/+problems/@ProblemComposite/HessianQuadratic.m @@ -1,4 +1,4 @@ function Hx = HessianQuadratic(prob, x) -[~, grad] = prob.callf1(x); +[grad, ~] = prob.f1.gradient(x); Hx = grad-prob.q; diff --git a/@ProblemComposite/ProblemComposite.m b/+forbes/+problems/@ProblemComposite/ProblemComposite.m similarity index 75% rename from @ProblemComposite/ProblemComposite.m rename to +forbes/+problems/@ProblemComposite/ProblemComposite.m index 4b64af2..bb08106 100644 --- a/@ProblemComposite/ProblemComposite.m +++ b/+forbes/+problems/@ProblemComposite/ProblemComposite.m @@ -19,15 +19,11 @@ if isempty(f1) prob.istheref1 = false; else - if ~isfield(f1, 'isQuadratic') || ~f1.isQuadratic - error('function f1 must be quadratic'); - end - if ~isfield(f1, 'makef') - error('value/gradient of f1 is not defined (there is no makef)'); + if ~f1.is_quadratic() + error('f1 must be quadratic'); end prob.istheref1 = true; prob.f1 = f1; - prob.callf1 = f1.makef(); if isempty(C1) prob.m1 = prob.n; prob.isthereC1 = false; @@ -36,7 +32,7 @@ prob.m1 = [size(prob.C1, 1), 1]; prob.isthereC1 = true; end - [~, prob.q] = prob.callf1(zeros(prob.m1)); + [prob.q, ~] = f1.gradient(zeros(prob.m1)); prob.Q = @(x) prob.HessianQuadratic(x); if isempty(d1) prob.d1 = sparse(zeros(prob.m1)); @@ -47,15 +43,14 @@ if isempty(f2) prob.istheref2 = false; else - if isfield(f2, 'isQuadratic') && f2.isQuadratic + if f2.is_quadratic() error('you should provide f2 as f1, since it is quadratic'); end - if ~isfield(f2, 'makef') - error('value/gradient of f2 is not defined (there is no makef)'); + if ~f2.is_smooth() + error('f2 must be smooth'); end prob.istheref2 = true; prob.f2 = f2; - prob.callf2 = f2.makef(); if isempty(C2) prob.m2 = prob.n; prob.isthereC2 = false; @@ -73,11 +68,7 @@ if isempty('g') error('missing g'); end - if ~isfield(g, 'makeprox') - error('the prox for the term g you specified is not available'); - end prob.g = g; - prob.callg = g.makeprox(); if isempty(D) prob.isthereD = false; else diff --git a/@ProblemComposite/isTightFrame.m b/+forbes/+problems/@ProblemComposite/isTightFrame.m similarity index 100% rename from @ProblemComposite/isTightFrame.m rename to +forbes/+problems/@ProblemComposite/isTightFrame.m diff --git a/+forbes/+solvers/@AbstractIterativeSolver/AbstractIterativeSolver.m b/+forbes/+solvers/@AbstractIterativeSolver/AbstractIterativeSolver.m new file mode 100644 index 0000000..eb20035 --- /dev/null +++ b/+forbes/+solvers/@AbstractIterativeSolver/AbstractIterativeSolver.m @@ -0,0 +1,26 @@ +classdef AbstractIterativeSolver < handle + properties + it, maxit, verbose + inittime, runtime + status, message + end + methods (Abstract) + initialize(obj, varargin) + iterate(obj) + solution(obj) + end + methods + function obj = AbstractIterativeSolver(varargin) + obj.maxit = varargin{1}; + obj.verbose = varargin{2}; + obj.status = -1; + obj.message = 'uninitialized'; + end + function display_header(obj) + fprintf('iter\n'); + end + function display_progress(obj) + fprintf('%d\n', obj.it); + end + end +end diff --git a/+forbes/+solvers/@AbstractIterativeSolver/get_iter.m b/+forbes/+solvers/@AbstractIterativeSolver/get_iter.m new file mode 100644 index 0000000..1114dbb --- /dev/null +++ b/+forbes/+solvers/@AbstractIterativeSolver/get_iter.m @@ -0,0 +1,3 @@ +function it = get_iter(obj) + it = obj.it; +end diff --git a/+forbes/+solvers/@AbstractIterativeSolver/get_runtime.m b/+forbes/+solvers/@AbstractIterativeSolver/get_runtime.m new file mode 100644 index 0000000..de53324 --- /dev/null +++ b/+forbes/+solvers/@AbstractIterativeSolver/get_runtime.m @@ -0,0 +1,3 @@ +function t = get_runtime(obj) + t = obj.runtime; +end diff --git a/+forbes/+solvers/@AbstractIterativeSolver/run.m b/+forbes/+solvers/@AbstractIterativeSolver/run.m new file mode 100644 index 0000000..bb14eee --- /dev/null +++ b/+forbes/+solvers/@AbstractIterativeSolver/run.m @@ -0,0 +1,24 @@ +function run(obj, varargin) + t0 = tic(); + obj.initialize(varargin{:}); + obj.it = 0; + obj.status = 1; + obj.message = 'exceeded max iter'; + obj.inittime = toc(t0); + if obj.verbose + obj.display_header(); + end + while obj.it < obj.maxit + stop = obj.iterate(); + obj.it = obj.it+1; + if obj.verbose + obj.display_progress(); + end + if stop + obj.status = 0; + obj.message = 'converged'; + break; + end + end + obj.runtime = toc(t0); +end diff --git a/+forbes/+solvers/@FBS/FBS.m b/+forbes/+solvers/@FBS/FBS.m new file mode 100644 index 0000000..d7bab0b --- /dev/null +++ b/+forbes/+solvers/@FBS/FBS.m @@ -0,0 +1,40 @@ +classdef FBS < forbes.solvers.AbstractIterativeSolver + properties + f1, A1, f2, A2, g, x0 + Lf, gam + x, z, z_prev + A1x, gradf1_A1x, f1_A1x + A2x, gradf2_A2x, f2_A2x + A1z_prev, gradf1_A1z_prev, A2z_prev + opt, adaptive + end + methods + function obj = FBS(varargin) + opt = struct(varargin{:}); + opt = forbes.solvers.FBS.defaults(opt); + obj@forbes.solvers.AbstractIterativeSolver(opt.maxit, opt.verbose); + obj.opt = opt; + end + function display_header(obj) + fprintf('%8s | %11s | %11s\n', 'iter', 'gam', 'fpr'); + end + function display_progress(obj) + fprintf('%8d | %8.5e | %8.5e\n', obj.it, obj.gam, norm(obj.x - obj.z, inf)); + end + end + methods (Static) + function opt = defaults(opt) + default_opt.verbose = false; + default_opt.maxit = 10000; + default_opt.tol = 1e-5; + default_opt.Lf = inf; + default_opt.fast = false; + default_fields = fieldnames(default_opt); + for i = 1:length(default_fields) + k = default_fields{i}; + v = getfield(default_opt, k); + if ~isfield(opt, k), opt = setfield(opt, k, v); end + end + end + end +end diff --git a/+forbes/+solvers/@FBS/initialize.m b/+forbes/+solvers/@FBS/initialize.m new file mode 100644 index 0000000..248a9ac --- /dev/null +++ b/+forbes/+solvers/@FBS/initialize.m @@ -0,0 +1,44 @@ +function initialize(obj, f1, A1, f2, A2, g, x0) + obj.f1 = f1; + obj.A1 = A1; + obj.f2 = f2; + obj.A2 = A2; + obj.g = g; + obj.x0 = x0; + + % compute (approximate) Lipschitz constant (if necessary) + + if ~isinf(obj.opt.Lf) + if isfield(obj.opt, 'adaptive') + obj.adaptive = obj.opt.adaptive; + else + obj.adaptive = false; + end + % nothing to compute if Lipschitz constant is provided by the user + obj.Lf = obj.opt.Lf; + else + if isfield(obj.opt, 'adaptive') + obj.adaptive = obj.opt.adaptive; + else + obj.adaptive = ~f2.is_null(); + end + if ~f2.is_null() + if ~f1.is_null() + f = forbes.functions.SeparableSum({f1, f2}, {size(A1, 1), size(A2, 1)}); + A = [A1; A2]; + obj.Lf = forbes.utils.lipschitz_lowbnd(f, A, x0); + else + obj.Lf = forbes.utils.lipschitz_lowbnd(f2, A2, x0); + end + elseif obj.adaptive + obj.Lf = forbes.utils.lipschitz_lowbnd(f1, A1, x0); + else + obj.Lf = forbes.utils.lipschitz_quadratic(f1, A1, x0); + end + end + + % set stepsize, initialize vectors + + obj.gam = 1.0/obj.Lf; + obj.x = obj.x0; +end diff --git a/+forbes/+solvers/@FBS/iterate.m b/+forbes/+solvers/@FBS/iterate.m new file mode 100644 index 0000000..9fff7e7 --- /dev/null +++ b/+forbes/+solvers/@FBS/iterate.m @@ -0,0 +1,75 @@ +function stop = iterate(obj) + if obj.it == 0 || ~obj.adaptive + obj.A1x = obj.A1*obj.x; + [obj.gradf1_A1x, obj.f1_A1x] = obj.f1.gradient(obj.A1x); + obj.A2x = obj.A2*obj.x; + end + [obj.gradf2_A2x, obj.f2_A2x] = obj.f2.gradient(obj.A2x); + At_gradf_Ax = obj.A1'*obj.gradf1_A1x + obj.A2'*obj.gradf2_A2x; + f_Ax = obj.f1_A1x + obj.f2_A2x; + obj.z_prev = obj.z; + [obj.z, ~] = obj.g.prox(obj.x - obj.gam*At_gradf_Ax, obj.gam); + + FPR_x = obj.x - obj.z; + normFPR_x = norm(FPR_x, 'fro'); + + uppbnd = f_Ax - At_gradf_Ax(:)'*FPR_x(:) + 0.5/obj.gam*normFPR_x^2; + + if obj.adaptive + for it_gam = 1:100 + A1z = obj.A1*obj.z; + [gradf1_A1z, f1_A1z] = obj.f1.gradient(A1z); + A2z = obj.A2*obj.z; + [gradf2_A2z, f2_A2z] = obj.f2.gradient(A2z); + f_Az = f1_A1z + f2_A2z; + if f_Az > uppbnd + 1e-6*abs(f_Ax) + obj.Lf = 2*obj.Lf; + obj.gam = 1/obj.Lf; + [z, ~] = obj.g.prox(obj.x - obj.gam*At_gradf_Ax, obj.gam); + FPR_x = obj.x - obj.z; + normFPR_x = norm(FPR_x, 'fro'); + uppbnd = f_Ax - At_gradf_Ax(:)'*FPR_x(:) + 0.5/obj.gam*normFPR_x^2; + else + break; + end + end + end + + if norm(FPR_x, inf)/obj.gam <= obj.opt.tol + stop = true; + return; + else + stop = false; + end + + if obj.it == 0 || obj.opt.fast == false + obj.x = obj.z; + if obj.adaptive + obj.A1x = A1z; + obj.gradf1_A1x = gradf1_A1z; + obj.f1_A1x = f1_A1z; + obj.A2x = A2z; + obj.gradf2_A2x = gradf2_A2z; + obj.f2_A2x = f2_A2z; + obj.A1z_prev = A1z; + obj.gradf1_A1z_prev = gradf1_A1z; + obj.A2z_prev = A2z; + end + else + extr = obj.it/(obj.it+3); + obj.x = obj.z + extr*(obj.z - obj.z_prev); + if obj.adaptive + % extrapolate other extrapolable quantities + diff_A1x = extr*(A1z - obj.A1z_prev); + obj.A1x = A1z + diff_A1x; + diff_gradf1_A1x = extr*(gradf1_A1z - obj.gradf1_A1z_prev); + obj.gradf1_A1x = gradf1_A1z + diff_gradf1_A1x; + obj.f1_A1x = f1_A1z + gradf1_A1z(:)'*diff_A1x + 0.5*(diff_A1x(:)'*diff_gradf1_A1x(:)); + obj.A2x = A2z + extr*(A2z - obj.A2z_prev); + % store the z-quantities for future extrapolation + obj.A1z_prev = A1z; + obj.gradf1_A1z_prev = gradf1_A1z; + obj.A2z_prev = A2z; + end + end +end diff --git a/+forbes/+solvers/@FBS/solution.m b/+forbes/+solvers/@FBS/solution.m new file mode 100644 index 0000000..cdb3869 --- /dev/null +++ b/+forbes/+solvers/@FBS/solution.m @@ -0,0 +1,3 @@ +function x = solution(obj) + x = obj.z; +end diff --git a/+forbes/+solvers/@NAMA/NAMA.m b/+forbes/+solvers/@NAMA/NAMA.m new file mode 100644 index 0000000..4dd05b4 --- /dev/null +++ b/+forbes/+solvers/@NAMA/NAMA.m @@ -0,0 +1,45 @@ +classdef NAMA < forbes.solvers.AbstractIterativeSolver + properties + f1, A1, f2, A2, g, x0 + Lf, gam + xk, xbark + A1xk, gradf1_A1xk, f1_A1xk % these are useful in the adaptive case + A2xk, gradf2_A2xk, f2_A2xk % + Hk + opt, adaptive + num_lsfails + end + methods + function obj = NAMA(varargin) + opt = struct(varargin{:}); + opt = forbes.solvers.NAMA.defaults(opt); + obj@forbes.solvers.AbstractIterativeSolver(opt.maxit, opt.verbose); + obj.opt = opt; + obj.Hk = opt.method; + obj.num_lsfails = 0; + end + function display_header(obj) + fprintf('%8s | %11s | %11s\n', 'iter', 'gam', 'fpr'); + end + function display_progress(obj) + fprintf('%8d | %8.5e | %8.5e\n', obj.it, obj.gam, norm(obj.xk - obj.xbark, inf)); + end + end + methods (Static) + function opt = defaults(opt) + default_opt.verbose = false; + default_opt.maxit = 10000; + default_opt.tol = 1e-5; + default_opt.bet = 0.05; + default_opt.Lf = inf; + default_opt.method = forbes.operators.LBFGS(10); + default_opt.maxbacktrack = 10; + default_fields = fieldnames(default_opt); + for i = 1:length(default_fields) + k = default_fields{i}; + v = getfield(default_opt, k); + if ~isfield(opt, k), opt = setfield(opt, k, v); end + end + end + end +end diff --git a/+forbes/+solvers/@NAMA/initialize.m b/+forbes/+solvers/@NAMA/initialize.m new file mode 100644 index 0000000..7318b56 --- /dev/null +++ b/+forbes/+solvers/@NAMA/initialize.m @@ -0,0 +1,44 @@ +function initialize(obj, f1, A1, f2, A2, g, x0) + obj.f1 = f1; + obj.A1 = A1; + obj.f2 = f2; + obj.A2 = A2; + obj.g = g; + obj.x0 = x0; + + % compute (approximate) Lipschitz constant (if necessary) + + if ~isinf(obj.opt.Lf) + if isfield(obj.opt, 'adaptive') + obj.adaptive = obj.opt.adaptive; + else + obj.adaptive = false; + end + % nothing to compute if Lipschitz constant is provided by the user + obj.Lf = obj.opt.Lf; + else + if isfield(obj.opt, 'adaptive') + obj.adaptive = obj.opt.adaptive; + else + obj.adaptive = ~f2.is_null(); + end + if ~f2.is_null() + if ~f1.is_null() + f = forbes.functions.SeparableSum({f1, f2}, {size(A1, 1), size(A2, 1)}); + A = [A1; A2]; + obj.Lf = forbes.utils.lipschitz_lowbnd(f, A, x0); + else + obj.Lf = forbes.utils.lipschitz_lowbnd(f2, A2, x0); + end + elseif obj.adaptive + obj.Lf = forbes.utils.lipschitz_lowbnd(f1, A1, x0); + else + obj.Lf = forbes.utils.lipschitz_quadratic(f1, A1, x0); + end + end + + % set stepsize, initialize vectors + + obj.gam = (1-obj.opt.bet)/obj.Lf; + obj.xk = obj.x0; +end diff --git a/+forbes/+solvers/@NAMA/iterate.m b/+forbes/+solvers/@NAMA/iterate.m new file mode 100644 index 0000000..bf0f7a1 --- /dev/null +++ b/+forbes/+solvers/@NAMA/iterate.m @@ -0,0 +1,183 @@ +function stop = iterate(obj) + if obj.it == 0 || ~obj.adaptive + obj.A1xk = obj.A1*obj.xk; + [obj.gradf1_A1xk, obj.f1_A1xk] = obj.f1.gradient(obj.A1xk); + obj.A2xk = obj.A2*obj.xk; + [obj.gradf2_A2xk, obj.f2_A2xk] = obj.f2.gradient(obj.A2xk); + end + At_gradf_Axk = obj.A1'*obj.gradf1_A1xk + obj.A2'*obj.gradf2_A2xk; + f_Axk = obj.f1_A1xk + obj.f2_A2xk; + [obj.xbark, g_xbark] = obj.g.prox(obj.xk - obj.gam*At_gradf_Axk, obj.gam); + + FPR_xk = obj.xk - obj.xbark; + normFPR_xk = norm(FPR_xk, 'fro'); + + uppbnd = f_Axk - At_gradf_Axk(:)'*FPR_xk(:) + 0.5/obj.gam*normFPR_xk^2; + + reset = false; + + if obj.adaptive + for it_gam = 1:100 + A1xbark = obj.A1*obj.xbark; + A2xbark = obj.A2*obj.xbark; + [gradf1_A1xbark, f1_A1xbark] = obj.f1.gradient(A1xbark); + [gradf2_A2xbark, f2_A2xbark] = obj.f2.gradient(A2xbark); + f_Axbark = f1_A1xbark + f2_A2xbark; + if f_Axbark > uppbnd + 1e-6*abs(f_Axk) + reset = true; + obj.Lf = 2*obj.Lf; + obj.gam = (1-obj.opt.bet)/obj.Lf; + [obj.xbark, g_xbark] = obj.g.prox(obj.xk - obj.gam*At_gradf_Axk, obj.gam); + FPR_xk = obj.xk - obj.xbark; + normFPR_xk = norm(FPR_xk, 'fro'); + uppbnd = f_Axk - At_gradf_Axk(:)'*FPR_xk(:) + 0.5/obj.gam*normFPR_xk^2; + else + break; + end + end + else + A1xbark = 0.0; + A2xbark = 0.0; + end + + if norm(FPR_xk, inf)/obj.gam <= obj.opt.tol + stop = true; + return; + else + stop = false; + end + + FBE_xk = uppbnd + g_xbark; + + % Compute direction + + if reset == true + obj.Hk.reset(); + end + + dk = -(obj.Hk*FPR_xk); + + % Perform backtracking: this looks messy, but it's really simple + % The mess comes from optimizing the calls to A1, A2 and f1.gradient, + % and from having to test obj.gam (in the adaptive case). + + tau = 1.0; + + xkdk = obj.xk + dk; + A1xkdk = obj.A1xk + obj.A1*dk; + A2xkdk = obj.A2xk + obj.A2*dk; + [gradf1_A1xkdk, f1_A1xkdk] = obj.f1.gradient(A1xkdk); + A1t_gradf1_A1xkdk = obj.A1'*gradf1_A1xkdk; + + lin_coeff = 0.0; + quad_coeff = 0.0; + A1t_gradf1_A1xbark = 0.0; + + for lsit = 1:obj.opt.maxbacktrack + wk = (1-tau)*obj.xbark + tau*xkdk; + A2wk = (1-tau)*A2xbark + tau*A2xkdk; + A1t_gradf1_A1wk = (1-tau)*A1t_gradf1_A1xbark + tau*A1t_gradf1_A1xkdk; + + % Explanation of next (and some of previous) line(s). + % + % Function f1 is quadratic, therefore the following expansion is exact: + % + % f1(x+y) = f1(x) + f1'(x)*y + 0.5*f1"*||y||^2. + % + % In this case, we want to compute f1 at A1wk, and to do so we can + % expand f1 around A1xkdk. In fact + % + % A1wk = (1-tau)*A1xbark + tau*A1xkdk + % = A1xkdk + (1-tau)*(A1xbark - A1xkdk). + % + % Therefore at every backtracking iteration (i.e. for every tau) + % + % f1(A1wk) = f1(A1xkdk) + f1'(A1xkdk)*(1-tau)*(A1xbark - A1xkdk) + % + 0.5*f1"*(1-tau)^2*||A1xbark - A1xkdk||^2. + % + % How do we compute f1"? Well, for any two points x, y, one has + % + % f1"*(x-y) = f1'(x) - f1'(y). + + f1_A1wk = f1_A1xkdk + lin_coeff*(1-tau) + quad_coeff*(1-tau)^2; + [gradf2_A2wk, f2_A2wk] = obj.f2.gradient(A2wk); + A2t_gradf2_A2wk = obj.A2'*gradf2_A2wk; + At_gradf_Awk = A1t_gradf1_A1wk + A2t_gradf2_A2wk; + f_Awk = f1_A1wk + f2_A2wk; + [wbark, g_wbark] = obj.g.prox(wk - obj.gam*At_gradf_Awk, obj.gam); + FPR_wk = wk - wbark; + normFPR_wk = norm(FPR_wk, 'fro'); + reset = false; + uppbnd = f_Awk - At_gradf_Awk(:)'*FPR_wk(:) + 0.5/obj.gam*normFPR_wk^2; + + reset = false; + if obj.adaptive + for it_gam = 1:100 + A1wbark = obj.A1*wbark; + A2wbark = obj.A2*wbark; + [gradf1_A1wbark, f1_A1wbark] = obj.f1.gradient(A1wbark); + [gradf2_A2wbark, f2_A2wbark] = obj.f2.gradient(A2wbark); + f_Awbark = f1_A1wbark + f2_A2wbark; + if f_Awbark > uppbnd + 1e-6*abs(f_Awk) + reset = true; + obj.Lf = 2*obj.Lf; + obj.gam = (1-obj.opt.bet)/obj.Lf; + [wbark, g_wbark] = obj.g.prox(obj.wk - obj.gam*At_gradf_Awk, obj.gam); + FPR_wk = obj.wk - obj.wbark; + normFPR_wk = norm(FPR_wk, 'fro'); + uppbnd = f_Awk - At_gradf_Awk(:)'*FPR_wk(:) + 0.5/obj.gam*normFPR_wk^2; + else + break; + end + end + end + if reset == true, break; end + + FBE_wk = uppbnd + g_wbark; + if FBE_wk <= FBE_xk + xk_backup = obj.xk; + obj.xk = wbark; + if obj.adaptive + obj.A1xk = A1wbark; + obj.gradf1_A1xk = gradf1_A1wbark; + obj.f1_A1xk = f1_A1wbark; + obj.A2xk = A2wbark; + obj.gradf2_A2xk = gradf2_A2wbark; + obj.f2_A2xk = f2_A2wbark; + end + break; + end + if lsit == 1 + if ~obj.adaptive % otherwise we have already computed this stuff + A1xbark = obj.A1*obj.xbark; + A2xbark = obj.A2*obj.xbark; + [gradf1_A1xbark, ~] = obj.f1.gradient(A1xbark); + end + A1t_gradf1_A1xbark = obj.A1'*gradf1_A1xbark; + temp1 = A1xbark - A1xkdk; + lin_coeff = gradf1_A1xkdk(:)'*temp1(:); + temp2 = gradf1_A1xbark - gradf1_A1xkdk; + quad_coeff = 0.5*(temp1(:)'*temp2(:)); + end + if lsit == obj.opt.maxbacktrack + xk_backup = obj.xk; + obj.xk = obj.xbark; + if obj.adaptive + obj.A1xk = A1xbark; + obj.gradf1_A1xk = gradf1_A1xbark; + obj.f1_A1xk = f1_A1xbark; + obj.A2xk = A2xbark; + obj.gradf2_A2xk = gradf2_A2xbark; + obj.f2_A2xk = f2_A2xbark; + end + obj.num_lsfails = obj.num_lsfails + 1; + end + tau = 0.5*tau; + end + + if reset == true + obj.Hk.reset(); + else + obj.Hk.push(wk - xk_backup, FPR_wk - FPR_xk); + end +end diff --git a/+forbes/+solvers/@NAMA/solution.m b/+forbes/+solvers/@NAMA/solution.m new file mode 100644 index 0000000..4a5e907 --- /dev/null +++ b/+forbes/+solvers/@NAMA/solution.m @@ -0,0 +1,3 @@ +function x = solution(obj) + x = obj.xbark; +end diff --git a/+forbes/+tests/test_lasso_random.m b/+forbes/+tests/test_lasso_random.m new file mode 100644 index 0000000..4ebb513 --- /dev/null +++ b/+forbes/+tests/test_lasso_random.m @@ -0,0 +1,25 @@ +rng(0); + +m = 50; +n = 200; + +A = randn(m, n); +b = randn(m, 1); + +f = forbes.functions.SqrNormL2(); +aff = {A, -b}; +lam = 0.3*norm(A'*b, 'inf'); +g = forbes.functions.NormL1(lam); +x0 = zeros(n, 1); + +TOL = 1e-8; + +sol_FBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL)); + +x_star = sol_FBS.solution(); + +sol_AFBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL, 'fast', true)); +assert(norm(sol_AFBS.solution() - x_star, 'inf') <= 10*TOL*norm(x_star, 'inf')); + +sol_NAMA = forbes(f, g, x0, aff, {}, forbes.solvers.NAMA('tol', TOL)); +assert(norm(sol_NAMA.solution() - x_star, 'inf') <= 10*TOL*norm(x_star, 'inf')); diff --git a/+forbes/+tests/test_lasso_small.m b/+forbes/+tests/test_lasso_small.m new file mode 100644 index 0000000..8d40c83 --- /dev/null +++ b/+forbes/+tests/test_lasso_small.m @@ -0,0 +1,29 @@ +rng(0); + +A = [1, 2, -1, -1; ... + -2, -1, 0, -1; ... + 3, 0, 4, -1; ... + -4, -1, -3, 1; ... + 5, 3, 2, 3]'; +b = [1, 2, 3, 4]'; + +[m, n] = size(A); + +f = forbes.functions.SqrNormL2(); +aff = {A, -b}; +lam = 0.1*norm(A'*b, 'inf'); +g = forbes.functions.NormL1(lam); +x0 = zeros(n, 1); + +x_star = [-3.877278911564627e-01; 0; 0; 2.174149659863943e-02; 6.168435374149660e-01]; + +TOL = 1e-8; + +sol_FBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL)); +assert(norm(sol_FBS.solution() - x_star, 'inf') <= 10*TOL*norm(x_star, 'inf')); + +sol_AFBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL, 'fast', true)); +assert(norm(sol_AFBS.solution() - x_star, 'inf') <= 10*TOL*norm(x_star, 'inf')); + +sol_NAMA = forbes(f, g, x0, aff, {}, forbes.solvers.NAMA('tol', TOL)); +assert(norm(sol_NAMA.solution() - x_star, 'inf') <= 10*TOL*norm(x_star, 'inf')); diff --git a/+forbes/+tests/test_lqrCost.m b/+forbes/+tests/test_lqrCost.m new file mode 100644 index 0000000..c1ece38 --- /dev/null +++ b/+forbes/+tests/test_lqrCost.m @@ -0,0 +1 @@ +% TODO diff --git a/+forbes/+tests/test_mpc.m b/+forbes/+tests/test_mpc.m new file mode 100644 index 0000000..c1ece38 --- /dev/null +++ b/+forbes/+tests/test_mpc.m @@ -0,0 +1 @@ +% TODO diff --git a/+forbes/+tests/test_nn_matcomp_random.m b/+forbes/+tests/test_nn_matcomp_random.m new file mode 100644 index 0000000..9a5e402 --- /dev/null +++ b/+forbes/+tests/test_nn_matcomp_random.m @@ -0,0 +1,31 @@ +rng(0); + +m = 30; % number of rows +n = 30; % number of column of the original matrix M +d = 0.5; % density of coefficients sampled from M +r = 3; % rank of M + +U = randn(m, r); +V = randn(n, r); +M = U*V'; + +P = sprand(m, n, d) ~= 0; % sampling pattern +B = full(M.*P); + +f = forbes.functions.SqrNormL2(P); +aff = {1, -B}; +lam = 2; +g = forbes.functions.NuclearNorm(lam, 'exact'); +x0 = zeros(m, n); + +TOL = 1e-8; + +sol_FBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL)); + +x_star = sol_FBS.solution(); + +sol_AFBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL, 'fast', true)); +assert(norm(sol_AFBS.solution - x_star, 'fro') <= 10*TOL*norm(x_star, 'fro')); + +sol_NAMA = forbes(f, g, x0, aff, {}, forbes.solvers.NAMA('tol', TOL)); +assert(norm(sol_NAMA.solution - x_star, 'fro') <= 10*TOL*norm(x_star, 'fro')); diff --git a/+forbes/+tests/test_nn_matcomp_small.m b/+forbes/+tests/test_nn_matcomp_small.m new file mode 100644 index 0000000..0f2d46e --- /dev/null +++ b/+forbes/+tests/test_nn_matcomp_small.m @@ -0,0 +1,23 @@ +rng(0); + +B = % TODO: put something here +P = B ~= 0; + +f = forbes.functions.SqrNormL2(P); +aff = {1, -B}; +lam = % TODO: put something here +g = forbes.functions.NuclearNorm(lam, 'exact'); +x0 = zeros(m, n); + +x_star = % TODO: put something here + +TOL = 1e-8; + +sol_FBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL)); +assert(norm(sol_FBS.solution() - x_star, 'fro') <= 10*TOL*norm(x_star, 'fro')); + +sol_AFBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL, 'fast', true)); +assert(norm(sol_AFBS.solution() - x_star, 'fro') <= 10*TOL*norm(x_star, 'fro')); + +sol_NAMA = forbes(f, g, x0, aff, {}, forbes.solvers.NAMA('tol', TOL)); +assert(norm(sol_NAMA.solution() - x_star, 'fro') <= 10*TOL*norm(x_star, 'fro')); diff --git a/+forbes/+tests/test_qp.m b/+forbes/+tests/test_qp.m new file mode 100644 index 0000000..c1ece38 --- /dev/null +++ b/+forbes/+tests/test_qp.m @@ -0,0 +1 @@ +% TODO diff --git a/+forbes/+tests/test_sparse_logreg.m b/+forbes/+tests/test_sparse_logreg.m new file mode 100644 index 0000000..40dee8f --- /dev/null +++ b/+forbes/+tests/test_sparse_logreg.m @@ -0,0 +1,29 @@ +rng(0); + +A = [1, 2, -1, -1; ... + -2, -1, 0, -1; ... + 3, 0, 4, -1; ... + -4, -1, -3, 1; ... + 5, 3, 2, 3]'; +b = [1, 2, 3, 4]'; + +[m, n] = size(A); + +f = forbes.functions.LogisticLoss(1.0); +aff = {A, -b}; +lam = 0.1; +g = forbes.functions.NormL1(lam); +x0 = zeros(n, 1); + +x_star = [0; 0; 2.114635341704963e-01; 0; 2.845881348733116e+00]; + +TOL = 1e-8; + +sol_FBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL)); +assert(norm(sol_FBS.solution() - x_star, 'inf') <= 10*TOL*norm(x_star, 'inf')); + +sol_AFBS = forbes(f, g, x0, aff, {}, forbes.solvers.FBS('tol', TOL, 'fast', true)); +assert(norm(sol_AFBS.solution() - x_star, 'inf') <= 10*TOL*norm(x_star, 'inf')); + +sol_NAMA = forbes(f, g, x0, aff, {}, forbes.solvers.NAMA('tol', TOL)); +assert(norm(sol_NAMA.solution() - x_star, 'inf') <= 10*TOL*norm(x_star, 'inf')); diff --git a/library/RiccatiSolve.c b/+forbes/+utils/RiccatiSolve.c old mode 100755 new mode 100644 similarity index 100% rename from library/RiccatiSolve.c rename to +forbes/+utils/RiccatiSolve.c diff --git a/+forbes/+utils/RiccatiSolve.mexmaci64 b/+forbes/+utils/RiccatiSolve.mexmaci64 new file mode 100755 index 0000000..d421e2f Binary files /dev/null and b/+forbes/+utils/RiccatiSolve.mexmaci64 differ diff --git a/+forbes/+utils/lbfgs_mex.c b/+forbes/+utils/lbfgs_mex.c new file mode 100644 index 0000000..5ee0e18 --- /dev/null +++ b/+forbes/+utils/lbfgs_mex.c @@ -0,0 +1,91 @@ +#include "mex.h" +#include "libLBFGS.h" + +#define IS_REAL_SPARSE_MAT(P) (mxGetNumberOfDimensions(P) == 2 && \ + mxIsSparse(P) && mxIsDouble(P)) +#define IS_REAL_DENSE_MAT(P) (mxGetNumberOfDimensions(P) == 2 && \ + !mxIsSparse(P) && mxIsDouble(P)) +#define IS_REAL_DENSE_VEC(P) ((mxGetNumberOfDimensions(P) == 1 || \ + (mxGetNumberOfDimensions(P) == 2 && (mxGetN(P) == 1 || mxGetM(P) == 1))) && \ + !mxIsSparse(P) && mxIsDouble(P)) +#define IS_INT32_DENSE_VEC(P) ((mxGetNumberOfDimensions(P) == 1 || \ + (mxGetNumberOfDimensions(P) == 2 && (mxGetN(P) == 1 || mxGetM(P) == 1))) && \ + !mxIsSparse(P) && mxIsInt32(P)) +#define IS_REAL_SCALAR(P) (IS_REAL_DENSE_VEC(P) && mxGetNumberOfElements(P) == 1) +#define IS_INT32_SCALAR(P) (IS_INT32_DENSE_VEC(P) && mxGetNumberOfElements(P) == 1) + +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) +{ + int n, mem, curridx, currmem, dir_dims[2]; + double * dir, * s, * y, * ys, H, * g, * alpha; + + if (nrhs != 7) { + mexErrMsgTxt("lbfgs_mex: you should provide exactly 7 arguments."); + return; + } + if (nlhs > 1) { + mexErrMsgTxt("lbfgs_mex: too many output arguments."); + return; + } + if (!IS_REAL_DENSE_MAT(prhs[0])) { + mexErrMsgTxt("lbfgs_mex: 1st argument must be a double, dense matrix."); + return; + } + if (!IS_REAL_DENSE_MAT(prhs[1])) { + mexErrMsgTxt("lbfgs_mex: 2nd argument must be a double, dense matrix."); + return; + } + if (!IS_REAL_DENSE_VEC(prhs[2])) { + mexErrMsgTxt("lbfgs_mex: 3rd argument must be a double, dense vector."); + return; + } + if (!IS_REAL_SCALAR(prhs[3])) { + mexErrMsgTxt("lbfgs_mex: 4rd argument must be a double scalar."); + return; + } + if (!IS_REAL_DENSE_VEC(prhs[4])) { + mexErrMsgTxt("lbfgs_mex: 5th argument must be a double, dense vector."); + return; + } + if (!IS_INT32_SCALAR(prhs[5])) { + mexErrMsgTxt("lbfgs_mex: 6th argument must be a 32-bit integer."); + return; + } + if (!IS_INT32_SCALAR(prhs[6])) { + mexErrMsgTxt("lbfgs_mex: 7th argument must be a 32-bit integer."); + return; + } + + s = mxGetPr(prhs[0]); + y = mxGetPr(prhs[1]); + ys = mxGetPr(prhs[2]); + H = mxGetScalar(prhs[3]); + g = mxGetPr(prhs[4]); + curridx = (int)mxGetScalar(prhs[5])-1; + currmem = (int)mxGetScalar(prhs[6]); + + n = mxGetDimensions(prhs[0])[0]; + mem = mxGetDimensions(prhs[0])[1]; + dir_dims[0] = n; + dir_dims[1] = 1; + + alpha = mxCalloc(mem, sizeof(double)); + + dir_dims[0] = n; + dir_dims[1] = 1; + plhs[0] = mxCreateNumericArray(2, dir_dims, mxDOUBLE_CLASS, mxREAL); + dir = mxGetPr(plhs[0]); + + libLBFGS_buffer b; + b.n = n; + b.mem = mem; + b.currmem = currmem; + b.curridx = curridx; + b.s_n_m = s; + b.y_n_m = y; + b.ys_m = ys; + b.alpha_m = alpha; + libLBFGS_matvec(&b, H, g, dir); + + mxFree(alpha); +} diff --git a/+forbes/+utils/lbfgs_mex.mexmaci64 b/+forbes/+utils/lbfgs_mex.mexmaci64 new file mode 100755 index 0000000..f3a65bf Binary files /dev/null and b/+forbes/+utils/lbfgs_mex.mexmaci64 differ diff --git a/+forbes/+utils/libLBFGS.c b/+forbes/+utils/libLBFGS.c new file mode 100644 index 0000000..6c5d56b --- /dev/null +++ b/+forbes/+utils/libLBFGS.c @@ -0,0 +1,94 @@ +#include +#include "libLBFGS.h" + +libLBFGS_buffer * libLBFGS_init(int n, int mem) { + libLBFGS_buffer * b = (libLBFGS_buffer *)malloc(sizeof(libLBFGS_buffer)); + b->n = n; + b->mem = mem; + b->currmem = 0; + b->curridx = -1; + b->s_n_m = (double *)malloc(mem*n*sizeof(double)); + b->y_n_m = (double *)malloc(mem*n*sizeof(double)); + b->ys_m = (double *)malloc(mem*sizeof(double)); + b->alpha_m = (double *)malloc(mem*sizeof(double)); + return b; +} + +int libLBFGS_push(libLBFGS_buffer * b, double * s, double * y) { + int i, base; + b->curridx += 1; + if (b->curridx >= b->mem) b->curridx = 0; + b->currmem += 1; + if (b->currmem > b->mem) b->currmem = b->mem; + base = b->curridx*b->n; + for (i=0; in; i++) { + b->s_n_m[base+i] = s[i]; + b->y_n_m[base+i] = y[i]; + b->ys_m[b->curridx] += s[i]*y[i]; + } + return 0; +} + +int libLBFGS_matvec(libLBFGS_buffer * b, double H, double * g_n, double * dir_n) { + double beta; + int i, j, k; + int n = b->n; + int mem = b->mem; + int currmem = b->currmem; + int curridx = b->curridx; + double * s_n_m = b->s_n_m; + double * y_n_m = b->y_n_m; + double * ys_m = b->ys_m; + double * alpha_m = b->alpha_m; + + for (j=0; j=mem) i = 0; + for (k=0; k=mem) i = 0; + } + return 0; +} + +int libLBFGS_reset(libLBFGS_buffer * b) { + b->currmem = 0; + b->curridx = -1; + return 0; +} + +int libLBFGS_clear(libLBFGS_buffer * b) { + free(b->s_n_m); + free(b->y_n_m); + free(b->ys_m); + free(b->alpha_m); + free(b); + return 0; +} diff --git a/+forbes/+utils/libLBFGS.h b/+forbes/+utils/libLBFGS.h new file mode 100644 index 0000000..8d92371 --- /dev/null +++ b/+forbes/+utils/libLBFGS.h @@ -0,0 +1,62 @@ +typedef struct { + int n; + int mem; + int currmem; + int curridx; + double * s_n_m; + double * y_n_m; + double * ys_m; + double * alpha_m; +} libLBFGS_buffer; + +/* + Initialize a libLBFGS_buffer structure. + Parameters: + int n: dimension of the vectors to handle + int mem: memory of the buffer + Return value: + libLBFGS_buffer * b: pointer to the buffer structure +*/ +libLBFGS_buffer * libLBFGS_init (int n, int mem); + +/* + Pushes a new pair (s, y) into the buffer, and discards the oldest one if + necessary. + Parameters: + libLBFGS_buffer * b: pointer to the buffer structure + double * s: pointer to n-dimensional double vector, containing the + difference between to points x and x' + double * y: pointer to n-dimensional double vector, containing the + difference between the two gradients at x and x' + Return value: + int status: some integer status code +*/ +int libLBFGS_push (libLBFGS_buffer * b, double * s, double * y); + +/* + Performs a MATVEC operation between the inverse Hessian approximation stored + so far and a given vector. + Parameters: + libLBFGS_buffer * b: pointer to the buffer structure + double * g: pointer to n-dimensional double vector, containing the + vector to multiply with the inverse Hessian approximation + double * d: pointer to n-dimensional double vector, where to store the + result of the matrix-vector product. +*/ +int libLBFGS_matvec (libLBFGS_buffer * b, double H, double * g, double * d); + +/* + Resets the memory of the given buffer structure. + Parameters: + libLBFGS_buffer * b: pointer to the buffer structure +*/ +int libLBFGS_reset (libLBFGS_buffer * b); + +/* + Deallocates the given buffer structure: all the referenced memory locations + will be cleared, including the buffer itself, and the pointer will no longer + be usable. + Parameters: + libLBFGS_buffer * b: pointer to the buffer structure +*/ +int libLBFGS_clear (libLBFGS_buffer * b); diff --git a/+forbes/+utils/lipschitz_lowbnd.m b/+forbes/+utils/lipschitz_lowbnd.m new file mode 100644 index 0000000..2b9aef2 --- /dev/null +++ b/+forbes/+utils/lipschitz_lowbnd.m @@ -0,0 +1,7 @@ +function Lf = lipschitz_uppbnd(f, A, x) + delta = max(1e-12, x*1e-6); + y = x + delta; + [gradf_Ax, ~] = f.gradient(A*x); + [gradf_Ay, ~] = f.gradient(A*y); + Lf = norm(A'*(gradf_Ax - gradf_Ay), 'fro')/norm(delta, 'fro'); +end diff --git a/+forbes/+utils/lipschitz_quadratic.m b/+forbes/+utils/lipschitz_quadratic.m new file mode 100644 index 0000000..ef0d911 --- /dev/null +++ b/+forbes/+utils/lipschitz_quadratic.m @@ -0,0 +1,8 @@ +function Lf = lipschitz_quadratic(f, A, x) + sizex = size(x); + gradf_A0 = f.gradient(A*zeros(sizex)); + eigsOpt.issym = 1; + eigsOpt.tol = 1e-3; + funHessian = @(x) vec(A'*(f.gradient(A*reshape(x, sizex))-gradf_A0)); + Lf = eigs(funHessian, prod(sizex), 1, 'LM', eigsOpt); +end diff --git a/utils/numdiff.m b/+forbes/+utils/numdiff.m similarity index 100% rename from utils/numdiff.m rename to +forbes/+utils/numdiff.m diff --git a/utils/vec.m b/+forbes/+utils/vec.m similarity index 100% rename from utils/vec.m rename to +forbes/+utils/vec.m diff --git a/forbes_lasso.m b/+forbes/+wrappers/lasso.m similarity index 92% rename from forbes_lasso.m rename to +forbes/+wrappers/lasso.m index 2b45f7b..dcfb274 100644 --- a/forbes_lasso.m +++ b/+forbes/+wrappers/lasso.m @@ -1,11 +1,11 @@ -% FORBES_LASSO +% LASSO % -% FORBES_LASSO(A, b, lam, opt) solves the problem +% LASSO(A, b, lam, opt) solves the problem % % minimize (1/2)||Ax-b||^2 + lam*sum(abs(x)) % -function out = forbes_lasso(A, b, lam0, opt) +function out = lasso(A, b, lam0, opt) % if options are not given if nargin < 4, opt = struct(); end diff --git a/forbes_linear_mpc.m b/+forbes/+wrappers/linear_mpc.m similarity index 80% rename from forbes_linear_mpc.m rename to +forbes/+wrappers/linear_mpc.m index 7da546d..4be12cf 100644 --- a/forbes_linear_mpc.m +++ b/+forbes/+wrappers/linear_mpc.m @@ -1,6 +1,6 @@ -% FORBES_LINEAR_MPC +% LINEAR_MPC % -% FORBES_LINEAR_MPC(mpc_prob, opt) solves +% LINEAR_MPC(mpc_prob, opt) solves % the linear model predictive control problem % % min. 0.5*sum((x[k]-xref)'*Q*(x[k]-xref) + u[k]'*R*u[k], k=0,...,N-1) [stage cost] @@ -16,29 +16,28 @@ % s.t. x[0] = x0 % % x[k+1] = A x[k] + B u[k], k = 0,...,N-1 [dynamics] -% +% % mpc_prob is a structure containing the following problem parameters: -% +% % mpc_prob.x0, mpc_prob.xref, mpc_prob.Q, mpc_prob.R, mpc_prob.Q_N % mpc_prob.A, mpc_prob.B, mpc_prob.N, mpc_prob.L_s, mpc_prob.L_N -% +% % penalty functions g_s, g_N are determined as follows: -% -% mpc_prob.s_min, mpc_prob.s_max: lower/upper bound on the stage -% mpc_prob.x_N_min, mpc_prob.x_N_max: lower/upper bound on final state -% +% +% mpc_prob.s_min, mpc_prob.s_max: lower/upper bound on the stage +% mpc_prob.x_N_min, mpc_prob.x_N_max: lower/upper bound on final state +% % and -% +% % mpc_prob.stage_w = [w_1, ..., w_{m_s}], the weights to apply % to the linear penalty for each constraint violation (+inf: hard % constraint) -% +% % mpc_prob.final_w = [w_1, ..., w_{m_N}], analogous to the % previous case -% +% -% function out = forbes_linear_mpc(x0, xref, Q, R, Q_N, A, B, N, g, L, opt, out_prev) -function out = forbes_linear_mpc(mpc_prob, opt, out_prev) +function out = linear_mpc(mpc_prob, opt, out_prev) t0 = tic(); @@ -49,7 +48,7 @@ end if ~exist('opt','var'), opt = []; end - + if ~isfield(opt,'prescale') || isempty(opt.prescale) opt.prescale = 1; end @@ -60,25 +59,30 @@ n_u = size(mpc_prob.B, 2); m_stage = size(mpc_prob.L_s, 1); - + % Make objective term f - if ~isfield(mpc_prob, 'xref') || isempty(mpc_prob.xref) - f = lqrCost(mpc_prob.x0, mpc_prob.Q, mpc_prob.R, mpc_prob.Q_N, ... + if nargin >= 3 && ~isempty(out_prev) + f = out_prev.forbes.prob.f1.f; + f.set_x0(mpc_prob.x0); + elseif ~isfield(mpc_prob, 'xref') || isempty(mpc_prob.xref) + f = forbes.functions.LQRCost(mpc_prob.x0, ... + mpc_prob.Q, mpc_prob.R, mpc_prob.Q_N, ... mpc_prob.A, mpc_prob.B, mpc_prob.N); mpc_prob.xref = zeros(n_x, 1); else - f = lqrCost(mpc_prob.x0, mpc_prob.Q, mpc_prob.R, mpc_prob.Q_N, ... + f = forbes.functions.LQRCost(mpc_prob.x0, ... + mpc_prob.Q, mpc_prob.R, mpc_prob.Q_N, ... mpc_prob.A, mpc_prob.B, mpc_prob.N, mpc_prob.xref); end % Build big constraint matrix - + if isfield(mpc_prob, 'x_N_ellipse') mpc_prob.L_N = mpc_prob.x_N_ellipse{1}; alpha = mpc_prob.x_N_ellipse{2}; end - + m_final = size(mpc_prob.L_N, 1); diag_L = {}; @@ -87,15 +91,15 @@ end diag_L{mpc_prob.N+1} = mpc_prob.L_N; L = sparse(blkdiag(diag_L{:})); % ugly - + % Compute scaling if opt.prescale scale = zeros(size(L, 1), 1); - callfconj = f.makefconj(); - [~, p] = callfconj(zeros(size(L, 2),1)); + fc = forbes.functions.Conjugate(f); + [p, ~] = fc.gradient(zeros(size(L, 2),1)); for i = 1:size(L, 1) - [~, dgradi] = callfconj(L(i, :)'); + [dgradi, ~] = fc.gradient(L(i, :)'); w = L(i, :)*(dgradi-p); if w >= 1e-14 scale(i) = 1/sqrt(w); @@ -115,18 +119,18 @@ xu_max = [xu_max; mpc_prob.s_max]; w = [w; mpc_prob.stage_w]; end - + if isfield(mpc_prob, 'x_N_ellipse') % Case where ellipsoidal final constraint is selected xu_min_scaled = scale(1:mpc_prob.N*m_stage).*xu_min; xu_max_scaled = scale(1:mpc_prob.N*m_stage).*xu_max; w_scaled = w./scale(1:mpc_prob.N*m_stage); - g_s = distBox(xu_min_scaled, xu_max_scaled, w_scaled); + g_s = forbes.functions.DistBoxL1(xu_min_scaled, xu_max_scaled, w_scaled); % do not scale final constraint scale_N = mean(scale(end-m_final+1:end)); scale(end-m_final+1:end) = scale_N; - g_N = indBall_l2(scale_N*sqrt(2*alpha), scale_N*mpc_prob.xref); - g = separableSum({g_s, g_N}, {mpc_prob.N*m_stage, n_x}); + g_N = forbes.functions.IndBallL2(scale_N*sqrt(2*alpha), scale_N*mpc_prob.xref); + g = forbes.functions.SeparableSum({g_s, g_N}, {mpc_prob.N*m_stage, n_x}); else % Case where ordinary (soft/hard) final constraint is selected xu_min = [xu_min; mpc_prob.x_N_min]; @@ -135,9 +139,9 @@ xu_min_scaled = scale.*xu_min; xu_max_scaled = scale.*xu_max; w_scaled = w./scale; - g = distBox(xu_min_scaled, xu_max_scaled, w_scaled); + g = forbes.functions.DistBoxL1(xu_min_scaled, xu_max_scaled, w_scaled); end - + L_scaled = sparse(diag(scale))*L; % Now the problem to solve is @@ -146,7 +150,7 @@ % Set starting (dual) point - if ~exist('out_prev', 'var') || isempty(out_prev) + if nargin < 3 || isempty(out_prev) y0 = zeros(size(L_scaled, 1), 1); else y0 = out_prev.y; diff --git a/forbes_qp.m b/+forbes/+wrappers/qp.m similarity index 95% rename from forbes_qp.m rename to +forbes/+wrappers/qp.m index a917ecb..922d4d5 100644 --- a/forbes_qp.m +++ b/+forbes/+wrappers/qp.m @@ -1,6 +1,6 @@ -% FORBES_QP +% QP % -% FORBES_QP(H, q, A, lb, ub, Aeq, beq, lx, ux, opt, out1) solves the +% QP(H, q, A, lb, ub, Aeq, beq, lx, ux, opt, out1) solves the % quadratic programming problem % % minimize (1/2)*x'*H*x + q'*x @@ -12,7 +12,7 @@ % If last argument out1 is specified, then the solution process is % warm-started based on the output of a previous call. -function out = forbes_qp(H, q, A, lb, ub, Aeq, beq, lx, ux, opt, out1) +function out = qp(H, q, A, lb, ub, Aeq, beq, lx, ux, opt, out1) t0 = tic(); @@ -21,13 +21,13 @@ if nargin < 1 || isempty(H) error('first parameter H is mandatory'); end - + n = size(H, 2); - + if nargin < 2 || isempty(q) q = zeros(n, 1); end - + if nargin < 3 || isempty(A) flag_ineq = 0; else @@ -36,25 +36,25 @@ error('argument A is incompatible with H and q'); end end - + m = size(A, 1); - + if nargin < 4 || isempty(lb) lb = -inf(m, 1); end - + if any(size(lb) ~= [m, 1]) error('argument lb is incompatible with A'); end - + if nargin < 5 || isempty(ub) ub = +inf(m, 1); end - + if any(size(ub) ~= [m, 1]) error('argument ub is incompatible with A'); end - + if nargin < 6 || isempty(Aeq) flag_eq = 0; else @@ -63,7 +63,7 @@ error('size of Aeq is incompatible with H and q'); end end - + if flag_eq == 1 && (nargin < 7 || isempty(beq) || any(size(beq) ~= [size(Aeq,1), 1])) error('argument beq is incompatible with Aeq'); end @@ -77,7 +77,7 @@ lx = lx*ones(n, 1); end end - + if nargin < 9 || isempty(ux) flag_ux = 0; ux = +inf(n, 1); @@ -87,16 +87,16 @@ ux = ux*ones(n, 1); end end - + if nargin < 10, opt = []; end if nargin < 11, out1 = []; end - + if ~isfield(opt, 'prescale') || isempty(opt.prescale) opt.prescale = true; end - + % Problem setup and solution - + if flag_ineq == 0 && flag_eq == 0 f = quadratic(H, q); g = indBox(lx, ux); @@ -172,9 +172,9 @@ tprep = toc(t0); out_forbes = forbes(f, g, y0, [], constr, opt); end - + ttot = toc(t0); - + out.status = out_forbes.flag; out.msg = out_forbes.message; out.x = out_forbes.x1; @@ -186,4 +186,4 @@ out.preprocess = tprep; out.time = ttot; out.solver = out_forbes; -end \ No newline at end of file +end diff --git a/cones/indCone.m b/cones/indCone.m deleted file mode 100644 index b837da8..0000000 --- a/cones/indCone.m +++ /dev/null @@ -1,16 +0,0 @@ -% This is the function -% -% f(x) = ind_K(x) -% -% where K is a cone in SCS format. - -function obj = indCone(K) - K = validate_cone(K); - obj.isConvex = 1; - obj.makeprox = @() @(z, gam) call_indCone_proj(z, K); -end - -function [proj, val] = call_indCone_proj(z, K) - proj = proj_cone(z, K); - val = 0; -end diff --git a/cones/my_proj/proj_cone.m b/cones/my_proj/proj_cone.m deleted file mode 100644 index 0c9247b..0000000 --- a/cones/my_proj/proj_cone.m +++ /dev/null @@ -1,41 +0,0 @@ -function x = proj_cone(x, K) - -% zero cone -x(1:K.zero) = 0; -idx = K.zero; - -% nonnegative orthant -x(idx+1:idx+K.nn) = max(x(idx+1:idx+K.nn), 0); -idx = idx + K.nn; - -% second-order cones -for i=1:length(K.soc) - x(idx+1:idx+K.soc(i)) = proj_soc(x(idx+1:idx+K.soc(i))); - idx = idx + K.soc(i); -end - -% % semidefinite cones -% for i=1:length(K.sdc) -% z(idx+1:idx+getSdConeSize(K.sdc(i))) = proj_sdc(z(idx+1:idx+getSdConeSize(K.sdc(i))),K.sdc(i)); -% idx=idx+getSdConeSize(K.sdc(i)); -% end - -% % exponential cones -% for i = 1:K.exp -% z(idx+1:idx+3) = proj_exp(z(idx+1:idx+3)); -% idx = idx+3; -% end - -% % power cones -% for i = 1:length(K.pow) -% if (K.pow(i) > 0) -% % primal -% z(idx+1:idx+3) = proj_pow(z(idx+1:idx+3), K.ppow(i)); -% else -% % dual -% z(idx+1:idx+3) = z(idx+1:idx+3) + proj_pow(-z(idx+1:idx+3), -K.pow(i)); -% end -% idx = idx+3; -% end - -end \ No newline at end of file diff --git a/cones/my_proj/proj_dual_cone.m b/cones/my_proj/proj_dual_cone.m deleted file mode 100644 index 00e5d35..0000000 --- a/cones/my_proj/proj_dual_cone.m +++ /dev/null @@ -1,24 +0,0 @@ -function x = proj_dual_cone(x, K) - -% y = proj_cone(-x, K); -% x = x + y; % Moreau identity - -% It looks like for some cones (such as second-order cones) project directly -% onto the dual cone when one knows how to do that, rather than using -% Moreau identity, yields more accurate results. - -% dual zero cone (i.e. the free cone) -% nothing to do -idx = K.zero; - -% nonnegative orthant (self-dual) -x(idx+1:idx+K.nn) = max(x(idx+1:idx+K.nn), 0); -idx = idx + K.nn; - -% second-order cones (self-dual) -for i=1:length(K.soc) - x(idx+1:idx+K.soc(i)) = proj_soc(x(idx+1:idx+K.soc(i))); - idx = idx + K.soc(i); -end - -end diff --git a/cones/my_proj/proj_soc.m b/cones/my_proj/proj_soc.m deleted file mode 100644 index 4985dad..0000000 --- a/cones/my_proj/proj_soc.m +++ /dev/null @@ -1,25 +0,0 @@ -function z = proj_soc(z) - -if isempty(z) - z=[]; - return; -elseif length(z)==1 - z = max(z,0); - return; -end - -v1 = z(1); -v2 = z(2:end); -normv2 = norm(v2); - -if v1 <= -normv2 - z = zeros(length(z), 1); -elseif v1 >= normv2 - z = z; -else - a = (v1+normv2)/2; - z(1) = a; - z(2:end) = a*(z(2:end)/normv2); -end - -end \ No newline at end of file diff --git a/cones/my_proj/validate_cone.m b/cones/my_proj/validate_cone.m deleted file mode 100644 index 771cdfa..0000000 --- a/cones/my_proj/validate_cone.m +++ /dev/null @@ -1,9 +0,0 @@ -function K = validate_cone(K) - -if (~isfield(K, 'zero')), K.zero = 0; end -if (~isfield(K, 'nn')), K.nn = 0; end -if (~isfield(K, 'soc')), K.soc = []; end -if (~isfield(K, 'sdc')), K.sdc = []; end -if (~isfield(K, 'exp')), K.exp = 0; end - -end diff --git a/cones/nonsmoothCP.m b/cones/nonsmoothCP.m deleted file mode 100644 index 628cde9..0000000 --- a/cones/nonsmoothCP.m +++ /dev/null @@ -1,16 +0,0 @@ -% This is the function -% -% f(x) = ind_S(x) -% -% where S = 0^n \times K \times 0, and K is a cone in SCS format. - -function obj = nonsmoothCP(m, n, K) - K = validate_cone(K); - obj.isConvex = 1; - obj.makeprox = @() @(z, gam) call_indCone_proj(z, m, n, K); -end - -function [proj, val] = call_indCone_proj(z, m, n, K) - proj = [zeros(n, 1); proj_cone(z(n+1:n+m), K); 0]; - val = 0; -end diff --git a/cones/normalize_data.m b/cones/normalize_data.m deleted file mode 100644 index 544ac34..0000000 --- a/cones/normalize_data.m +++ /dev/null @@ -1,87 +0,0 @@ -function [data, w] = normalize_data(data, K, scale, w) -[m,n] = size(data.A); - -MIN_SCALE = 1e-3; -MAX_SCALE = 1e3; -minRowScale = MIN_SCALE * sqrt(n); -maxRowScale = MAX_SCALE * sqrt(n); -minColScale = MIN_SCALE * sqrt(m); -maxColScale = MAX_SCALE * sqrt(m); - -D = ones(m,1); -E = ones(n,1); -NN = 1; % NN = 1, other choices bad -for j=1:NN - %% D scale: - Dt = twonorms(data.A(1:K.zero,:)')'; - idx = K.zero; - Dt = [Dt;twonorms(data.A(idx+1:idx+K.nn,:)')']; - idx = idx + K.nn; - for i=1:length(K.soc) - if (K.soc(i) > 0) - nmA = mean(twonorms(data.A(idx+1:idx+K.soc(i),:)')); - Dt = [Dt;nmA*ones(K.soc(i),1)]; - idx = idx + K.soc(i); - end - end -% for i=1:length(K.sdc) -% if (K.sdc(i) > 0) -% nmA = mean(twonorms(data.A(idx+1:idx+getSdConeSize(K.sdc(i)),:)')); -% Dt = [Dt;nmA*ones(getSdConeSize(K.sdc(i)),1)]; -% idx = idx + getSdConeSize(K.sdc(i)); -% end -% end -% for i=1:K.expp -% nmA = mean(twonorms(data.A(idx+1:idx+3,:)')); -% Dt = [Dt;nmA*ones(3,1)]; -% idx = idx + 3; -% end -% for i=1:K.expd -% nmA = mean(twonorms(data.A(idx+1:idx+3,:)')); -% Dt = [Dt;nmA*ones(3,1)]; -% idx = idx + 3; -% end -% for i=1:length(K.pow) -% nmA = mean(twonorms(data.A(idx+1:idx+3,:)')); -% Dt = [Dt;nmA*ones(3,1)]; -% idx = idx + 3; -% end - - Dt(Dt < minRowScale) = 1; - Dt(Dt > maxRowScale) = maxRowScale; - data.A = sparse(diag(1./Dt))*data.A; - - %% E Scale - Et = twonorms(data.A)'; - Et(Et < minColScale) = 1; - Et(Et > maxColScale) = maxColScale; - data.A = data.A*sparse(diag(1./Et)); - - %% - D = D.*Dt; - E = E.*Et; -end - -nmrowA = mean(twonorms(data.A')); -nmcolA = mean(twonorms(data.A)); - -data.A = data.A*scale; - -data.b = data.b./D; -sc_b = nmcolA/ max(norm(data.b), MIN_SCALE); -data.b = data.b * sc_b * scale; - -data.c = data.c./E; -sc_c = nmrowA/max(norm(data.c), MIN_SCALE); -data.c = data.c * sc_c * scale; - -w.D = D; -w.E = E; -w.sc_b = sc_b; -w.sc_c = sc_c; - - function twoNorms = twonorms(A) - twoNorms = sqrt(sum(A.^2,1)); - end - -end \ No newline at end of file diff --git a/cones/proj_cone.m b/cones/proj_cone.m deleted file mode 100644 index 0c9247b..0000000 --- a/cones/proj_cone.m +++ /dev/null @@ -1,41 +0,0 @@ -function x = proj_cone(x, K) - -% zero cone -x(1:K.zero) = 0; -idx = K.zero; - -% nonnegative orthant -x(idx+1:idx+K.nn) = max(x(idx+1:idx+K.nn), 0); -idx = idx + K.nn; - -% second-order cones -for i=1:length(K.soc) - x(idx+1:idx+K.soc(i)) = proj_soc(x(idx+1:idx+K.soc(i))); - idx = idx + K.soc(i); -end - -% % semidefinite cones -% for i=1:length(K.sdc) -% z(idx+1:idx+getSdConeSize(K.sdc(i))) = proj_sdc(z(idx+1:idx+getSdConeSize(K.sdc(i))),K.sdc(i)); -% idx=idx+getSdConeSize(K.sdc(i)); -% end - -% % exponential cones -% for i = 1:K.exp -% z(idx+1:idx+3) = proj_exp(z(idx+1:idx+3)); -% idx = idx+3; -% end - -% % power cones -% for i = 1:length(K.pow) -% if (K.pow(i) > 0) -% % primal -% z(idx+1:idx+3) = proj_pow(z(idx+1:idx+3), K.ppow(i)); -% else -% % dual -% z(idx+1:idx+3) = z(idx+1:idx+3) + proj_pow(-z(idx+1:idx+3), -K.pow(i)); -% end -% idx = idx+3; -% end - -end \ No newline at end of file diff --git a/cones/proj_dual_cone.m b/cones/proj_dual_cone.m deleted file mode 100644 index 00e5d35..0000000 --- a/cones/proj_dual_cone.m +++ /dev/null @@ -1,24 +0,0 @@ -function x = proj_dual_cone(x, K) - -% y = proj_cone(-x, K); -% x = x + y; % Moreau identity - -% It looks like for some cones (such as second-order cones) project directly -% onto the dual cone when one knows how to do that, rather than using -% Moreau identity, yields more accurate results. - -% dual zero cone (i.e. the free cone) -% nothing to do -idx = K.zero; - -% nonnegative orthant (self-dual) -x(idx+1:idx+K.nn) = max(x(idx+1:idx+K.nn), 0); -idx = idx + K.nn; - -% second-order cones (self-dual) -for i=1:length(K.soc) - x(idx+1:idx+K.soc(i)) = proj_soc(x(idx+1:idx+K.soc(i))); - idx = idx + K.soc(i); -end - -end diff --git a/cones/proj_soc.m b/cones/proj_soc.m deleted file mode 100644 index 4985dad..0000000 --- a/cones/proj_soc.m +++ /dev/null @@ -1,25 +0,0 @@ -function z = proj_soc(z) - -if isempty(z) - z=[]; - return; -elseif length(z)==1 - z = max(z,0); - return; -end - -v1 = z(1); -v2 = z(2:end); -normv2 = norm(v2); - -if v1 <= -normv2 - z = zeros(length(z), 1); -elseif v1 >= normv2 - z = z; -else - a = (v1+normv2)/2; - z(1) = a; - z(2:end) = a*(z(2:end)/normv2); -end - -end \ No newline at end of file diff --git a/cones/scs_proj/proj_cone.m b/cones/scs_proj/proj_cone.m deleted file mode 100644 index 0d51c21..0000000 --- a/cones/scs_proj/proj_cone.m +++ /dev/null @@ -1,3 +0,0 @@ -function z = proj_cone(z,c) - z = z + proj_dual_cone(-z,c); -end \ No newline at end of file diff --git a/cones/scs_proj/proj_dual_cone.m b/cones/scs_proj/proj_dual_cone.m deleted file mode 100644 index 0071f16..0000000 --- a/cones/scs_proj/proj_dual_cone.m +++ /dev/null @@ -1,36 +0,0 @@ -function z = proj_dual_cone(z,K) %% DUAL CONE -% lp cone -z(K.f+1:K.l+K.f) = max(z(K.f+1:K.l+K.f),0); -idx=K.l+K.f; -% SOCs -for i=1:length(K.q) - z(idx+1:idx+K.q(i)) = proj_soc(z(idx+1:idx+K.q(i))); - idx=idx+K.q(i); -end -% SDCs -for i=1:length(K.s) - z(idx+1:idx+getSdConeSize(K.s(i))) = proj_sdp(z(idx+1:idx+getSdConeSize(K.s(i))),K.s(i)); - idx=idx+getSdConeSize(K.s(i)); -end -% EXP cones -for i=1:K.ep - z(idx+1:idx+3) = z(idx+1:idx+3) + proj_exp(-z(idx+1:idx+3)); - idx=idx+3; -end -% dual EXP cones -for i=1:K.ed - z(idx+1:idx+3) = proj_exp(z(idx+1:idx+3)); - idx=idx+3; -end -% power cone -for i=1:length(K.p) - if (K.p(i) > 0) - % primal - z(idx+1:idx+3) = z(idx+1:idx+3) + proj_pow(-z(idx+1:idx+3), K.p(i)); - else - % dual - z(idx+1:idx+3) = proj_pow(z(idx+1:idx+3), -K.p(i)); - end - idx=idx+3; -end -end \ No newline at end of file diff --git a/cones/scs_proj/proj_exp.m b/cones/scs_proj/proj_exp.m deleted file mode 100644 index 3bee339..0000000 --- a/cones/scs_proj/proj_exp.m +++ /dev/null @@ -1,98 +0,0 @@ -function [x, iter] = proj_exp(v) -global EXP_CONE_MAX_ITERS; -global EXP_CONE_TOL; - -EXP_CONE_MAX_ITERS = 100; -EXP_CONE_TOL = 1e-8; - -iter = 0; -r = v(1); s = v(2); t = v(3); -% v in cl(Kexp) -if( (s*exp(r./s) -t <= 1e-6 && s > 0) || (r <= 0 && s == 0 && t >= 0) ); - x = v; - return -end - -% -v in Kexp^* -if ( (-r < 0 && r*exp(s./r) + exp(1)*t <= 1e-6) || (-r == 0 && -s >= 0 && -t >= 0) ); - x = zeros(3,1); - return -end - -% special case with analytical solution -if(r < 0 && s < 0); - x = v; - x(2) = 0; - x(3) = max(v(3),0); - return -end - -x = v; -[ub,lb] = getRhoUb(v); -for iter=1:EXP_CONE_MAX_ITERS; - rho = (ub + lb)/2; - [g,x] = calcGrad(v,rho); - if (g > 0) - lb = rho; - else - ub = rho; - end - if (ub - lb < EXP_CONE_TOL) - break - end -end -%x(3) = x(2) * exp(x(1)/x(2)); % makes dual worse -end - -function [ub,lb] = getRhoUb(v) -lb = 0; -rho = 2^(-3); -[g,z] = calcGrad(v,rho); -while (g>0) - lb = rho; - rho = rho*2; - [g,z] = calcGrad(v,rho); -end -ub = rho; -end - -function [g,x] = calcGrad(v,rho) -x = solve_with_rho(v,rho); -if (x(2)==0) - g = x(1); -else - g = (x(1) + x(2)*log(x(2)/x(3))); -end -end - - -function x = solve_with_rho(v,rho) -x = zeros(3,1); -x(3) = newton_exp_onz(rho,v(2),v(3)); -x(2) = (1/rho)*(x(3) - v(3))*x(3); -x(1) = v(1) - rho; -end - - -function z = newton_exp_onz(rho, y_hat, z_hat) -global EXP_CONE_MAX_ITERS; -global EXP_CONE_TOL; - -t = max(-z_hat,EXP_CONE_TOL); -for iter=1:EXP_CONE_MAX_ITERS; - f = (1/rho^2)*t*(t + z_hat) - y_hat/rho + log(t/rho) + 1; - fp = (1/rho^2)*(2*t + z_hat) + 1/t; - - t = t - f/fp; - if (t <= -z_hat) - t = -z_hat; - break; - elseif (t <= 0) - t = 0; - break; - elseif (abs(f)=0 && y>=0 && (x^a) * (y^(1-a)) >= abs(z)); - x = in; - return -end - -% -v in K_a^* -u = -in(1); v = -in(2); w = -in(3); -if (u>=0 && v>=0 && (u^a) * (v^(1-a)) >= abs(w) * (a^a) * ((1-a)^(1-a))); - x = zeros(3,1); - return -end - -xh = in(1); -yh = in(2); -zh = in(3); -rh = abs(zh); -r = rh / 2; -for iter=1:CONE_MAX_ITERS; - x = calcX(r, xh, rh, a); - y = calcX(r, yh, rh, 1-a); - - f = calcF(x,y,r,a); - if abs(f) < CONE_TOL - break - end - - dxdr = calcdxdr(x,xh,rh,r,a); - dydr = calcdxdr(y,yh,rh,r,(1-a)); - fp = calcFp(x,y,dxdr,dydr,a); - - r = min(max(r - f/fp,0), rh); -end -z = sign(zh) * r; -x = [x;y;z]; -end - -function x = calcX(r, xh, rh, a) -x = max(0.5 * (xh + sqrt(xh*xh + 4 * a * (rh - r) * r)), 1e-12); -end - -function dx = calcdxdr(x,xh,rh,r, a) -dx = a * (rh - 2*r) / (2*x - xh); -end - -function f = calcF(x,y,r,a) -f = (x^a) * (y^(1-a)) - r; -end - -function fp = calcFp(x,y,dxdr,dydr,a) -fp = (x^a) * (y^(1-a)) * (a * dxdr / x + (1-a) * dydr / y) - 1; -end \ No newline at end of file diff --git a/cones/scs_proj/proj_sdp.m b/cones/scs_proj/proj_sdp.m deleted file mode 100644 index 1340b53..0000000 --- a/cones/scs_proj/proj_sdp.m +++ /dev/null @@ -1,33 +0,0 @@ -function z = proj_sdp(z,n) -if n==0 - return; -elseif n==1 - z = max(z,0); - return; -end - -% expand to full size matrix -b = tril(ones(n)); -b(b == 1) = z; -z = b; -z = (z + z'); -z = z - diag(diag(z)) / 2; - -% rescale so projection works, and matrix norm preserved -% see http://www.seas.ucla.edu/~vandenbe/publications/mlbook.pdf pg 3 -% scale diags by sqrt(2) -z(eye(n) == 1) = z(eye(n) == 1) .* sqrt(2); - -[V,S] = eig(z); -S = diag(S); - -idx = find(S>0); -V = V(:,idx); -S = S(idx); -z = V*diag(S)*V'; - -% scale diags by 1/sqrt(2) -z(eye(n) == 1) = z(eye(n) == 1) ./ sqrt(2); - -z = z(tril(ones(n)) == 1); -end \ No newline at end of file diff --git a/cones/scs_proj/proj_soc.m b/cones/scs_proj/proj_soc.m deleted file mode 100644 index d13f743..0000000 --- a/cones/scs_proj/proj_soc.m +++ /dev/null @@ -1,17 +0,0 @@ -function z = proj_soc(tt) -if isempty(tt) - z=[]; - return; -elseif length(tt)==1 - z = max(tt,0); - return; -end -v1=tt(1);v2=tt(2:end); -if norm(v2)<=-v1 - v2=zeros(length(v2),1);v1=0; -elseif norm(v2)> abs(v1) - v2=0.5*(1+v1/norm(v2))*v2; - v1=norm(v2); -end -z=[v1;v2]; -end \ No newline at end of file diff --git a/cones/scs_proj/validate_cone.m b/cones/scs_proj/validate_cone.m deleted file mode 100644 index 670ac45..0000000 --- a/cones/scs_proj/validate_cone.m +++ /dev/null @@ -1,23 +0,0 @@ -function K = validate_cone(K) -if (~isfield(K, 'f')) - K.f = 0; -end -if (~isfield(K, 'l')) - K.l = 0; -end -if (~isfield(K, 'ep')) - K.ep = 0; -end -if (~isfield(K, 'ed')) - K.ed = 0; -end -if (~isfield(K, 'q')) - K.q = []; -end -if (~isfield(K, 's')) - K.s = []; -end -if (~isfield(K, 'p')) - K.p = []; -end -end diff --git a/cones/smoothCP.m b/cones/smoothCP.m deleted file mode 100644 index 1869e40..0000000 --- a/cones/smoothCP.m +++ /dev/null @@ -1,21 +0,0 @@ -% This is the function -% -% f(w) = (b/2)||(x,y)-a||^2 + ind_K*(y) -% -% where K* is the dual cone to K and x has length n. -% Only the conjugate function is implemented here. - -function obj = smoothCP(n, K, a, b) - K = validate_cone(K); - obj.isConvex = 1; - obj.makefconj = @() @(z) call_coneCost_conj(z, n, K, a, b); -end - -function [v, g] = call_coneCost_conj(z, n, K, a, b) - p1 = a(1:n) + z(1:n)/b; - p2 = proj_dual_cone(a(n+1:end) + z(n+1:end)/b, K); - x = z(1:n); - y = proj_dual_cone(z(n+1:end), K); - g = [p1; p2]; - v = z'*g - (b/2)*norm(g-a)^2; -end diff --git a/cones/test_lbfgs.m b/cones/test_lbfgs.m deleted file mode 100644 index d681053..0000000 --- a/cones/test_lbfgs.m +++ /dev/null @@ -1,72 +0,0 @@ -% test for LBFGS routine - -close all; -clear; - -Q = [32.0000 13.1000 -4.9000 -3.0000 6.0000 2.2000 2.6000 3.4000 -1.9000 -7.5000; ... - 13.1000 18.3000 -5.3000 -9.5000 3.0000 2.1000 3.9000 3.0000 -3.6000 -4.4000; ... - -4.9000 -5.3000 7.7000 2.1000 -0.4000 -3.4000 -0.8000 -3.0000 5.3000 5.5000; ... - -3.0000 -9.5000 2.1000 20.1000 1.1000 0.8000 -12.4000 -2.5000 5.5000 2.1000; ... - 6.0000 3.0000 -0.4000 1.1000 3.8000 0.6000 0.5000 0.9000 -0.4000 -2.0000; ... - 2.2000 2.1000 -3.4000 0.8000 0.6000 7.8000 2.9000 -1.3000 -4.3000 -5.1000; ... - 2.6000 3.9000 -0.8000 -12.4000 0.5000 2.9000 14.5000 1.7000 -4.9000 1.2000; ... - 3.4000 3.0000 -3.0000 -2.5000 0.9000 -1.3000 1.7000 6.6000 -0.8000 2.7000; ... - -1.9000 -3.6000 5.3000 5.5000 -0.4000 -4.3000 -4.9000 -0.8000 7.9000 5.7000; ... - -7.5000 -4.4000 5.5000 2.1000 -2.0000 -5.1000 1.2000 2.7000 5.7000 16.1000]; - -q = [2.9000 0.8000 1.3000 -1.1000 -0.5000 -0.3000 1.0000 -0.3000 0.7000 -2.1000]'; - -xs = [1.0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09; ... - 0.09,1.0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08; ... - 0.08,0.09,1.0,0.01,0.02,0.03,0.04,0.05,0.06,0.07; ... - 0.07,0.08,0.09,1.0,0.01,0.02,0.03,0.04,0.05,0.06; ... - 0.06,0.07,0.08,0.09,1.0,0.01,0.02,0.03,0.04,0.05]'; - -dirs_ref = [ ... - -3.476000000000000e+01 -6.861170733797231e-01 -1.621334774299757e-01 -2.008976150849174e-01 -2.317011191832649e-01; - -1.367700000000000e+01 -1.661270665201917e+00 2.870743130038511e-01 2.237224648542354e-01 2.980080835636926e-02; - 2.961000000000000e+00 2.217225828759783e-01 -5.485761164147891e-01 4.811889625788801e-02 -1.267017945785352e-01; - 3.756000000000000e+00 5.615134140894827e-01 9.992734938824949e-02 -6.855884193567087e-01 4.328230970765587e-02; - -5.618000000000001e+00 -1.922426760799171e-01 -1.332550298134261e-02 -2.729265954345345e-02 -2.437461022925742e-01; - -1.571000000000000e+00 -8.961101045874649e-02 5.326252573648003e-02 3.651730112313705e-02 1.349716200511426e-02; - -4.121000000000000e+00 -3.044802963260585e-01 -6.299408068289100e-02 6.325330777317102e-02 -7.155992987801297e-04; - -3.709000000000000e+00 -1.996235459345302e-01 1.525398352758626e-02 2.871281112230844e-02 -3.513449694839536e-03; - 4.010000000000000e-01 1.267604425710271e-01 -7.776943954825602e-02 -1.285590864125103e-01 -5.603489763638488e-02; - 7.639999999999999e+00 3.360845247013288e-01 -2.335884953507600e-02 -3.204963735369062e-03 5.612114259243499e-02 ]; - -dirs = []; % matrix of directions (to be filled in) - -mem = 3; -col = 0; % last column of Sk, Yk that was filled in -currmem = 0; - -S = zeros(10, 3); -Y = zeros(10, 3); -YS = zeros(1, 3); -H0 = 1; - -for i=1:5 - x = xs(:,i); - grad = Q*x + q; - if i > 1 - Sk = x-x_old; - Yk = grad-grad_old; - YSk = Yk'*Sk; - col = 1+mod(col, mem); - currmem = min(currmem+1, mem); - S(:,col) = Sk; - Y(:,col) = Yk; - YS(col) = YSk; - H0 = YSk/(Yk'*Yk); - end - dir = lbfgs(S, Y, YS, H0, -grad, int32(col), int32(currmem)); - dirs = [dirs, dir]; - x_old = x; - grad_old = grad; -end - -if norm(dirs-dirs_ref, inf) <= 1e-12 - disp('test passed'); -else - disp('test failed'); -end \ No newline at end of file diff --git a/cones/validate_cone.m b/cones/validate_cone.m deleted file mode 100644 index 771cdfa..0000000 --- a/cones/validate_cone.m +++ /dev/null @@ -1,9 +0,0 @@ -function K = validate_cone(K) - -if (~isfield(K, 'zero')), K.zero = 0; end -if (~isfield(K, 'nn')), K.nn = 0; end -if (~isfield(K, 'soc')), K.soc = []; end -if (~isfield(K, 'sdc')), K.sdc = []; end -if (~isfield(K, 'exp')), K.exp = 0; end - -end diff --git a/demos/demo_lasso.m b/demos/demo_lasso.m index 7cbeb30..6af07a3 100755 --- a/demos/demo_lasso.m +++ b/demos/demo_lasso.m @@ -18,9 +18,9 @@ lam_max = norm(A'*b,'inf'); lam = 0.05*lam_max; -f = quadLoss(1, zeros(m,1)); +f = forbes.functions.SqrNormL2(); aff = {A, -b}; -g = l1Norm(lam); +g = forbes.functions.NormL1(lam); x0 = zeros(n, 1); opt.maxit = 10000; opt.tol = 1e-9; diff --git a/demos/demo_matrix_completion.m b/demos/demo_matrix_completion.m index 11e26a6..1caf546 100755 --- a/demos/demo_matrix_completion.m +++ b/demos/demo_matrix_completion.m @@ -19,9 +19,10 @@ lam = 1e0; -f = quadLoss(P(:), B(:)); -g = nuclearNorm(m, n, lam); -x0 = zeros(m*n, 1); +f = forbes.functions.SqrNormL2(P); +aff = {1, -B}; +g = forbes.functions.NuclearNorm(lam, 'inexact'); +x0 = zeros(m, n); opt.maxit = 1000; opt.tol = 1e-6; opt.Lf = 1; @@ -31,7 +32,7 @@ opt_fbs = opt; opt_fbs.solver = 'fbs'; opt_fbs.variant = 'fast'; -out = forbes(f, g, x0, [], [], opt_fbs); +out = forbes(f, g, x0, aff, [], opt_fbs); fprintf('\n'); fprintf('iterations : %d\n', out.solver.iterations); fprintf('SVDs : %d\n', out.solver.operations.proxg); @@ -41,7 +42,7 @@ fprintf('\nL-BFGS\n'); opt_lbfgs = opt; opt_lbfgs.method = 'lbfgs'; -out = forbes(f, g, x0, [], [], opt_lbfgs); +out = forbes(f, g, x0, aff, [], opt_lbfgs); fprintf('\n'); fprintf('iterations : %d\n', out.solver.iterations); fprintf('SVDs : %d\n', out.solver.operations.proxg); diff --git a/demos/demo_matrix_completion_noncvx.m b/demos/demo_matrix_completion_noncvx.m index 0750b50..da49ca7 100755 --- a/demos/demo_matrix_completion_noncvx.m +++ b/demos/demo_matrix_completion_noncvx.m @@ -17,9 +17,10 @@ P = sprand(m, n, d) ~= 0; % sampling pattern B = full(M.*P); -f = quadLoss(P(:), B(:)); -g = indRankBall(m, n, 5); -x0 = zeros(m*n, 1); +f = forbes.functions.SqrNormL2(P); +aff = {1, -B}; +g = forbes.functions.IndRankBall(5); +x0 = zeros(m, n); opt.maxit = 1000; opt.tol = 1e-6; opt.Lf = 1; @@ -29,7 +30,7 @@ opt_fbs = opt; opt_fbs.solver = 'fbs'; opt_fbs.variant = 'basic'; -out_fbs = forbes(f, g, x0, [], [], opt_fbs); +out_fbs = forbes(f, g, x0, aff, [], opt_fbs); fprintf('\n'); fprintf('iterations : %d\n', out_fbs.solver.iterations); fprintf('SVDs : %d\n', out_fbs.solver.operations.proxg); @@ -39,7 +40,7 @@ fprintf('\nL-BFGS\n'); opt_lbfgs = opt; opt_lbfgs.method = 'lbfgs'; -out_lbfgs = forbes(f, g, x0, [], [], opt_lbfgs); +out_lbfgs = forbes(f, g, x0, aff, [], opt_lbfgs); fprintf('\n'); fprintf('iterations : %d\n', out_lbfgs.solver.iterations); fprintf('SVDs : %d\n', out_lbfgs.solver.operations.proxg); diff --git a/demos/demo_sparse_huber.m b/demos/demo_sparse_huber.m index 5a60551..f458e29 100755 --- a/demos/demo_sparse_huber.m +++ b/demos/demo_sparse_huber.m @@ -17,9 +17,9 @@ % since we know what small/large noise means, would do cross-validation otherwise I guess del = 1; -f = huberLoss(del); +f = forbes.functions.HuberLoss(del); aff = {A, -b}; -g = l1Norm(lam); +g = forbes.functions.NormL1(lam); x0 = zeros(n, 1); opt.maxit = 10000; opt.tol = 1e-6; diff --git a/demos/demo_sparse_logreg.m b/demos/demo_sparse_logreg.m index dc5c395..c1fa1b1 100755 --- a/demos/demo_sparse_logreg.m +++ b/demos/demo_sparse_logreg.m @@ -13,9 +13,9 @@ lam_max = norm(0.5*(A'*b),'inf')/m; lam = 0.1*lam_max; -f = logLoss(1/m); -aff = {diag(sparse(b))*A, zeros(m, 1)}; -g = l1Norm(lam); +f = forbes.functions.LogisticLoss(1/m); +aff = {diag(sparse(b))*A, 0}; +g = forbes.functions.NormL1(lam); x0 = zeros(n, 1); opt.maxit = 10000; opt.tol = 1e-6; diff --git a/demos/demo_svm.m b/demos/demo_svm.m index 1b75fc0..e00c439 100755 --- a/demos/demo_svm.m +++ b/demos/demo_svm.m @@ -24,8 +24,8 @@ fprintf('%d instances, %d features, nnz(A) = %d\n', size(A, 1), size(A, 2), nnz(A)); -f = quadLoss(lam, zeros(n+1, 1)); -g = hingeLoss(1, b); +f = forbes.functions.SqrNormL2(lam); +g = forbes.functions.HingeLoss(1, b); constr = {A, -1, zeros(m, 1)}; y0 = zeros(m, 1); opt.maxit = 10000; diff --git a/forbes.m b/forbes.m index e7e32de..feb3d39 100755 --- a/forbes.m +++ b/forbes.m @@ -1,4 +1,4 @@ -% FORBES Solver for nonsmooth, nonconvex optimization problems. +% FORBES Solvers for nonsmooth, nonconvex optimization problems. % % Composite problems % ------------------ @@ -8,13 +8,13 @@ % We assume that f is continuously differentiable, and that g is closed % and proper. C is a linear mapping and can be a MATLAB matrix, or any % other matrix-like object, which essentially supports matrix-vector -% products, transposition and 'size'. For example operators from the Spot -% toolbox [1] can be used to form C. +% products, transposition and 'size'. For example, the SPOT toolbox +% (http://www.cs.ubc.ca/labs/scl/spot/) can be used to define C. % -% out = FORBES(f, g, init, aff, [], opt) solves the problem with the +% out = FORBES(f, g, init, aff, [], sol) solves the problem with the % specified f and g. init is the initial value for x, aff is a cell array -% containing {C, d} (in this order). opt is a structure defining the -% options for the solver (more on this later). +% containing {C, d} (in this order). Last argument sol (optional) is a solver +% object (more on this later). % % Separable problems % ------------------ @@ -28,104 +28,163 @@ % % out = FORBES(f, g, init, [], constr, opt) solves the specified problem. % init is the initial *dual* variable, constr is a cell array defining -% the constraint, i.e., constr = {A, B, b}. the options are specified in -% the opt structure (more on this later). +% the constraint, i.e., constr = {A, B, b}. Last argument sol (optional) is a +% solver object (more on this later). % -% Functions and linear mappings -% ----------------------------- +% General forms of problems +% ------------------------- % -% Functions f and g in the cost can be selected in a library of functions -% available in the "library" directory inside of FORBES directory. Linear -% mappings (C in problem (1) and A, B in problem (2) above) can either be -% MATLAB's matrices or can themselves be picked from a library of -% standard linear operators. +% More general forms of problems are formulated as follows, when multiple +% variables, terms, constraints are involved: % -% For example, to define f and g: +% (1b) minimize f_1(C_11 x_1 + ... + C_1M x_M + d_1) +% + ... +% + f_N(C_N1 x_1 + ... + C_NM x_M + d_N) +% + g_M(x_1) + ... + g_M(x_M) % -% f = logLoss() % logistic loss function -% g = l1Norm() % l1 regularization term +% solved with FORBES(fs, gs, aff) where % -% Consider looking into the "library" directory for specific information -% on any of the functions. +% fs = {f_1, ..., f_N} +% gs = {g_1, ..., g_N} +% aff = {C_11, ..., C_1M, d_1; ...; C_N1, ..., C_NM, d_N} % -% Options -% ------- +% (2b) minimize f_1(x_1) + ... + f_N(x_N) +% + g_1(z_1) + ... + g_M(z_M) +% subject to A_11 x_1 + ... + A_1N x_N + B_1 z_1 = b_1 +% [...] +% A_M1 x_1 + ... + A_MN x_N + B_M z_M = b_M % -% In opt the user can specify the behaviour of the algorithm to be used. -% The following options can be set: +% solved with FORBES(fs, gs, [], coeff) where % -% opt.tol: Tolerance on the optimality condition. +% fs = {f_1, ..., f_N} +% gs = {g_1, ..., g_N} +% coeff = {A_11, ..., A_1N, B_1, b_1; ...; A_M1, ..., A_MN, B_M, b_M} % -% opt.maxit: Maximum number of iterations. +% Functions and linear mappings +% ----------------------------- % -% opt.solver: Internal solver to use. Can select between: -% * 'minfbe' (only for problems where g is convex) -% * 'zerofpr' (default, can handle also nonconvex g) +% TODO % -% opt.method: Algorithm to use. Can select between: -% * 'bfgs' (BFGS quasi-Newton method) -% * 'lbfgs' (default, limited memory BFGS). +% Solvers +% ------- % -% opt.linesearch: Line search strategy to use. Can select between: -% * 'backtracking' (default, simple backtracking), -% * 'backtracking-armijo' (backtracking satisfying Armijo condition), -% * 'backtracking-nm' (nonmonotone backtracking), -% * 'lemarechal' (line search for the Wolfe conditions). +% TODO % % References % ---------- % -% [1] Spot linear operators toolbox: http://www.cs.ubc.ca/labs/scl/spot/ +% TODO % -% Authors: Lorenzo Stella (lorenzo.stella -at- imtlucca.it) -% Panagiotis Patrinos (panos.patrinos -at- esat.kuleuven.be) +% Authors: Lorenzo Stella, Panagiotis Patrinos -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . +function out = forbes(fs, gs, init, aff, constr, solver) + + % Fill-in defaults + + if nargin < 4, aff = {}; end + if nargin < 5, constr = {}; end + if nargin < 6, solver = forbes.solvers.NAMA(); end + + % Convert single functions to cell arrays + + if ~iscell(fs), fs = {fs}; end + if ~iscell(gs), gs = {gs}; end + + % Detect problem type (primal or dual) + + ptype = 0; + if ~isempty(aff) + ptype = ptype + 1; + end + if ~isempty(constr) + ptype = ptype + 2; + end + + % Prepare problem + + switch ptype -function out = forbes(fs, gs, init, aff, constr, opt) + case 0 - t0 = tic(); + error('must have either aff or constr'); - if nargin < 3, error('you must provide at least 3 arguments'); end - if nargin < 4, aff = []; end - if nargin < 5, constr = []; end - if nargin < 6, opt = []; end + case 1 % Solve primal - [prob, id] = Process_Problem(fs, gs, init, aff, constr); - opt = Process_Options(opt); - lsopt = Process_LineSearchOptions(opt); + n_vars = size(aff, 2)-1; + [idx_q, idx_n] = split_quadratic(fs); - preprocess = toc(t0); + [f_q, C_q] = aggregate_terms(fs, aff, idx_q); + [f_n, C_n] = aggregate_terms(fs, aff, idx_n); - out_solver = opt.solverfun(prob, opt, lsopt); + dims_g = {}; + for i = 1:n_vars + dims_g{end+1} = size(aff{1, i}, 2); + end + if n_vars > 1 + g = forbes.functions.SeparableSum(gs, dims_g); + elseif n_vars == 1 + g = gs{1}; + else + error('must have at least one nonsmooth term g'); + end - out.message = out_solver.message; - out.flag = out_solver.flag; - if id == 1 - out.x = out_solver.x; + case 2 % Solve dual + + error('dual solvers not implemented'); + + case 3 + + error('cannot have both aff and constr'); + + otherwise + + error('unknown problem type'); + + end + + % Call solver + + solver.run(f_q, C_q, f_n, C_n, g, init); + + % TODO: should we produce the output differently? probably yes + % For example, in case we solve the dual, we want the primal solution + + out = solver; + +end + +function [idx_q, idx_nq] = split_quadratic(fs) + idx_q = []; + idx_nq = []; + for i = 1:length(fs) + if fs{i}.is_quadratic() + idx_q = [idx_q, i]; + else + idx_nq = [idx_nq, i]; + end + end +end + +function [f, C] = aggregate_terms(fs, aff, idx) + dims = {}; + C = []; + d = []; + for i = idx + dims{end+1} = size(aff{i, 1}, 1); + C = [C; [aff{i, 1:end-1}]]; + d = [d; aff{i, end}]; + end + if length(idx) > 1 + s = forbes.functions.SeparableSum(fs(idx), dims); + elseif length(idx) == 1 + s = fs{idx}; + else + C = 1.0; + s = forbes.functions.Zero(); + end + if norm(d, 'fro') > 0 + f = forbes.functions.ScaleTranslate(s, 1.0, d); else - [out.x1, out.x2, out.z] = prob.Get_DualPoints(out_solver.x, out_solver.gam); - out.y = out_solver.x; + f = s; end - out.solver = out_solver; - out.prob = prob; - out.opt = opt; - out.lsopt = lsopt; - out.preprocess = preprocess; - out.time = toc(t0); end diff --git a/forbes_alm.m b/forbes_alm.m deleted file mode 100644 index 4bdd377..0000000 --- a/forbes_alm.m +++ /dev/null @@ -1,145 +0,0 @@ -%FORBES_ALM Solver for nonsmooth optimization problems. -% -% FORBES_ALM(f, g, h, F, init, opt, inn_init, inn_opt) solves problems of the form -% -% minimize f(x) + g(x) + h(F(x)) -% -% We assume that f is smooth with Lipschitz continuous gradient, that -% g is a closed, proper function and that h is a closed, proper, convex -% function. Both g and h are assumed to have an easily computable proximal -% mapping. F is a linear mapping. -% -% Parameter init is the starting dual point, opt is a structure -% containing options for the augmented Lagrangian method, inn_opt is a -% structure containing options for the inner solver. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function out = forbes_alm(f, g, h, F, init, opt, inn_init, inn_opt) - -if nargin < 5 || isempty(init), init = zeros(F.m(1), F.m(2)); end -if nargin < 6, opt = []; end -if nargin < 7 || isempty(inn_init), inn_init = zeros(F.n(1),F.n(2)); end -if nargin < 8, inn_opt = []; end - -% fill-in missing options with defaults -opt = default_opt(opt); -inn_opt = default_inner_opt(inn_opt); - -if opt.display >= 2 - fprintf('%6s%11s%11s%11s%11s\n', 'iter', 'res', 'penalty', 'inner tol', 'inner it'); -end - -y = init; -x = inn_init; -res = zeros(1,opt.maxit); -callF = F.makeop(); - -tot_inn_it = 0; -tot_ops = OpsInit(); - -% Algorithm 17.4 Nocedal -r = 10; -inn_opt.tol = 1/r; -eta = 0.1258925; - -D = diagOp(F.n); -inn_linop = stackOp({F, D}); - -if ~isfield(opt, 'sqOpNorm') - linop_op = inn_linop.makeop(); - linop_adj = inn_linop.makeadj(); - linop_toiter = @(x) vec(linop_adj(linop_op(reshape(x, F.n(1), F.n(2))))); - eigsOpt.issym = 1; - eigsOpt.tol = 1e-3; - sqnorm_linop = eigs(linop_toiter, prod(F.n), 1, 'LM', eigsOpt); -else - sqnorm_linop = opt.sqOpNorm; -end - -for it = 1:opt.maxit - - % define smooth term of the inner augmented Lagrangian subproblem - hgamma = moreauEnvelope(h, 1/r); - inn_f = separableSum({hgamma, f}, {F.m, F.n}); - inn_aff = {inn_linop, [y(:)/r; zeros(prod(D.n), 1)]}; - if isfield(f, 'L'), inn_opt.Lf = f.L + sqnorm_linop*r; end - - % solve subproblem (warm start) - inn_out = forbes(inn_f, g, x, inn_aff, [], inn_opt); - x = inn_out.x; - tot_inn_it = tot_inn_it + inn_out.iterations; - tot_ops = OpsSum(tot_ops, inn_out.operations); - - % compute next dual iterate - callhgamma = hgamma.makef(); - [~, y1] = callhgamma(callF(x) + y/r); - res(1,it) = norm(y1-y)/r; - - % display info - if opt.display == 1 - fprintf('.'); - elseif opt.display >= 2 - fprintf('%6d %7.4e %7.4e %7.4e %10d\n', it, res(1,it), r, inn_opt.tol, inn_out.iterations); - end - - if res(1,it) <= eta - if res(1,it) <= opt.tol && inn_out.residual(end) <= opt.tol - break - else - y = y1; - eta = eta/r^0.9; - inn_opt.tol = inn_opt.tol/10; - end - else - r = 10*r; - eta = 1/r^0.1; - inn_opt.tol = 1/r; - end - - % stopping criterion - if res(1, it) <= opt.tol - break; - end - -end - -if opt.display == 1 - fprintf('\n'); -end - -out.x = inn_out.x; -out.y = y; -out.iterations = it; -out.inner_iterations = tot_inn_it; -out.operations = tot_ops; - -function opt = default_opt(opt) - -if ~isfield(opt, 'display'), opt.display = 1; end -if ~isfield(opt, 'maxit'), opt.maxit = 100; end -if ~isfield(opt, 'tol'), opt.tol = 1e-6; end - -function opt = default_inner_opt(opt) - -if ~isfield(opt, 'display'), opt.display = 0; end -if ~isfield(opt, 'tol'), opt.tol = 1e-6; end -if ~isfield(opt, 'solver'), opt.solver = 'zerofpr'; end -if ~isfield(opt, 'method'), opt.method = 'lbfgs'; end -% make sure the Lipschitz constant is not set (it cannot be known) -if isfield(opt, 'Lf'), opt = rmfield(opt, 'Lf'); end diff --git a/forbes_compare.m b/forbes_compare.m deleted file mode 100755 index 402bb49..0000000 --- a/forbes_compare.m +++ /dev/null @@ -1,8 +0,0 @@ -function outs = forbes_compare(fs, gs, init, aff, constr, opts) - outs = {}; - for i = 1:length(opts) - out = forbes(fs, gs, init, aff, constr, opts{i}); -% disp(out); - outs{end+1} = out; - end -end diff --git a/forbes_cp.m b/forbes_cp.m deleted file mode 100644 index da1d615..0000000 --- a/forbes_cp.m +++ /dev/null @@ -1,105 +0,0 @@ -function [x, y, s, info] = forbes_cp(data, K, opt) - -info.name = 'FBCS matlab (ForBES wrapper)'; -info.version = '0.1'; - -if nargin < 2 - x = []; y = []; s = []; - return; -end - -% some constants, flags etc. -normalize = 1; -scale = 1; -% verb_gap = 200; -% lbfgs_mem = 20; -% flag_small_stepsize = 0; -% eta = 0.00; % for nonmonotone line-search (eta = 0 makes it monotone, elsewhere we have eta = 0.85) -% max_iters_ls = 32; - -% t0 = tic(); - -K = validate_cone(K); - -n = length(data.c); -m = length(data.b); - -unscaled_b = data.b; -unscaled_c = data.c; - -unscaled_nm_b = norm(unscaled_b); -unscaled_nm_c = norm(unscaled_c); - -data_orig = data; - -work = struct(); -if (normalize) - [data, work] = normalize_data(data, K, scale, work); - D = work.D; - E = work.E; - sc_b = work.sc_b; - sc_c = work.sc_c; -else - scale = 1; - D = ones(m,1); - E = ones(n,1); - sc_b = 1; - sc_c = 1; -end - -% unwrap problem data -A = data.A; -b = data.b; -c = data.c; - -% determine gamma -L = [sparse(n,n), A'; -A, sparse(m,m); -c', -b']; -eigsOpts.issym = 1; -eigsOpts.tol = 1e-3; -sqNormL = eigs(@(x)L*(L'*x), n+m+1, 1, 'LM', eigsOpts); - -f = smoothCP(n, K, zeros(m+n,1), 1); -g = nonsmoothCP(m, n, K); -d = [-c; -b; 0]; -opt.Lf = sqNormL; - -t0 = tic(); -out_forbes = forbes(f, g, zeros(n+m+1,1), [], {L, -1, d}, opt); -ttot = toc(t0); - -% temporary -if opt.display > 0 - fprintf('Solver used : %s\n', out_forbes.name); - fprintf('Iterations : %d\n', out_forbes.iterations); - fprintf('Stepsize : %.2e (1/Lip = %.2e)\n', out_forbes.gam, 1/sqNormL); - fprintf('Solve time : %.2f\n', ttot); -end - -x = out_forbes.x2(1:n); -y = out_forbes.x2(n+1:end); -s = out_forbes.z(n+1:n+m); - -if (normalize) - y = y ./ (D * sc_c); - x = x ./ (E * sc_b); - s = s .* (D / (sc_b * scale)); -end - -A_orig = data_orig.A; -b_orig = data_orig.b; -c_orig = data_orig.c; - -unscaled_dres = A_orig'*y + c_orig; -unscaled_pres = b_orig - A_orig*x - s; -unscaled_cx = c_orig'*x; -unscaled_by = b_orig'*y; -unscaled_gap = -(unscaled_by + unscaled_cx); - -info.resPri = norm(unscaled_pres)/(1 + unscaled_nm_b); -info.resDual = norm(unscaled_dres)/(1 + unscaled_nm_c); -info.relGap = abs(unscaled_gap)/(1 + abs(unscaled_cx) + abs(unscaled_by)); - -info.iter = out_forbes.iterations; -info.status = 'solved'; - -end diff --git a/forbes_hankel.m b/forbes_hankel.m deleted file mode 100644 index 0ec2ef2..0000000 --- a/forbes_hankel.m +++ /dev/null @@ -1,80 +0,0 @@ -% FORBES_HANKEL -% -% FORBES_HANKEL(U, bx, r, mu, opt) solves -% -% minimize (1/2)||x-b||^2 + mu*sum(svd(H(x)U) -% -% or equivalently -% -% minimize (1/2)||x-b||^2 + mu*sum(svd(z)) -% subject to H(x)U - z = 0 -% -% Here, H(x) is a block Hankel matrix formed -% from x, and b and U come from measurements: -% -% U is a matrix such that U'U=I -% x is p by N+1 matrix -% U is N-r+1 by n matrix -% H(x) is p(r+1) by N-r+1 Hankel matrix -% - -function out = forbes_hankel(U, b, r, mu, tol, freq, opt, out1) - - p = size(b,1); - N = size(b,2)-1; - m = (r+1)*p; - n = size(U,2); - - f = quadLoss(1, b); - g = nuclearNorm(m, n, mu); - A = opHankel(N, r, p, U); - - if nargin < 8 || isempty(out1) - y0 = zeros(m,n); - else - y0 = out1.y; - end - - opt.Lf = min(r+1,N-r+1); % squared norm of A - opt.term = @(prob, it, gam, cache_0, cache_x, ops) terminate_hankel(it, freq, cache_x, b, mu, tol); - - out = forbes(f, g, y0, [], {A, -1, zeros(m, n)}, opt); - - G = -A*out.x1; - nmEY = norm(out.x1-b,'fro')^2/2; - out.pobj = nmEY + mu*sum(svd(G)); - -end - -function flag = terminate_hankel(it, freq, cache, b, mu, tol) - persistent pobj_best dobj_best; - - if it == 1 - pobj_best = +inf; - dobj_best = -inf; - end - - flag = false; - - if it <= 1 || mod(it, freq) ~= 0 - return; - end - - x = cache.gradf1res1x; - G = cache.gradf1x; - nmEY = norm(x-b,'fro')^2/2; - pobj = nmEY + mu*sum(svd(G)); - if pobj < pobj_best - pobj_best = pobj; - end - dobj = -cache.FBE; - if dobj > dobj_best - dobj_best = dobj; - end - gap_rel = abs(pobj_best-dobj_best)/max(abs(dobj_best),1); - - if gap_rel < tol - flag = true; - end - -end diff --git a/forbes_setup.m b/forbes_setup.m index 63fe3e8..9ffcb3d 100755 --- a/forbes_setup.m +++ b/forbes_setup.m @@ -1,26 +1,21 @@ % Add ForBES directory to MATLAB's path + forbes_path = fileparts(mfilename('fullpath')); -library_path = fullfile(forbes_path, 'library'); -private_path = fullfile(forbes_path, 'private'); -utils_path = fullfile(forbes_path, 'utils'); -cones_path = fullfile(forbes_path, 'cones'); disp(['Adding ForBES directory to MATLAB path: ', forbes_path]); addpath(forbes_path); -disp(['Adding ForBES library to MATLAB path: ', library_path]); -addpath(library_path); -disp(['Adding ForBES utils to MATLAB path: ', library_path]); -addpath(utils_path); -addpath(cones_path); savepath; % Compile necessary C source files -LBFGS_path = fullfile(forbes_path, 'private', 'lbfgs.c'); -Riccati_path = fullfile(forbes_path, 'library', 'RiccatiSolve.c'); -error_msg = 'The C compiler could not succesfully compile '; -if mex('-outdir', private_path, LBFGS_path), error([error_msg, LBFGS_path]); end -if mex('-outdir', library_path, Riccati_path), error([error_msg, Riccati_path]); end + +utils_path = fullfile(forbes_path, '+forbes', '+utils', filesep); +mex('-outdir', utils_path, [utils_path, 'lbfgs_mex.c'], [utils_path, 'libLBFGS.c']); +mex('-outdir', utils_path, [utils_path, 'RiccatiSolve.c']); + +% Success + disp('ForBES was succesfully configured and installed'); disp('Type ''help forbes'' to access the help file'); % Clear variables -clear forbes_path library_path cones_path private_path LBFGS_path Riccati_path error_msg; + +clear forbes_path utils_path; diff --git a/forbes_svm.m b/forbes_svm.m deleted file mode 100644 index dc70b7e..0000000 --- a/forbes_svm.m +++ /dev/null @@ -1,65 +0,0 @@ -% FORBES_SVM -% -% FORBES_SVM(A, b, lam, opt) solves the problem -% -% minimize (lam/2)||x||^2 + sum(max(0, b.*(1-A*x)) -% - -function out = forbes_svm(A, b, lam0, opt) - -% % if options are not given -% if nargin < 4, opt = struct(); end -% -% % set some defualt options -% if ~isfield(opt, 'term'), opt.term = []; end -% term0 = opt.term; -% if ~isfield(opt, 'tol'), opt.tol = []; end -% tol0 = opt.tol; -% if ~isfield(opt, 'display'), opt.display = 0; end -% -% % compute Lipschitz constant -% [m, n] = size(A); -% eigsOpt.issym = 1; -% eigsOpt.tol = 1e-3; -% funHessian = @(x) A'*(A*x); -% Lf = eigs(funHessian, n, 1, 'LM', eigsOpt); -% opt.Lf = Lf; -% -% % to warm start or not to warm start? -% if ~isfield(opt, 'continuation') || isempty(opt.continuation), opt.continuation = 1; end -% if opt.continuation -% lam_max = norm(A'*b,'inf'); -% lam = lam_max; -% else -% lam = lam0; -% end -% -% % -% f = quadLoss(1, zeros(m, 1)); -% init = zeros(n, 1); -% -% for i_cont = 1:100 -% -% % % this is the continuation scheme of SpaRSA -% % btilde = b-A*init; -% % lam = max(0.5*norm(A'*btilde,'inf'), lam0); -% -% % this is the simpler continuation scheme -% lam = max(0.5*lam, lam0); -% -% g = l1Norm(lam); -% if lam <= lam0 -% opt.term = term0; -% opt.tol = tol0; -% else -% opt.term = []; -% opt.tol = 1e-3*lam; -% end -% out = forbes(f, g, init, {A, -b}, [], opt); -% if lam <= lam0 -% break; -% end -% init = out.x; -% end - -end diff --git a/forbes_test.m b/forbes_test.m index a83320c..7e50970 100644 --- a/forbes_test.m +++ b/forbes_test.m @@ -1,36 +1,5 @@ -function forbes_test() - -% add path -addpath(fullfile(fileparts(mfilename('fullpath')), 'tests')); - -fprintf('* testing library functions:\n'); - -fprintf('%40s', 'sumOp... '); tic(); test_sumOp; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'stackOp... '); tic(); test_stackOp; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'separableSum... '); tic(); test_separableSum; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'quadratic... '); tic(); test_quadratic; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'lqrCost... '); tic(); test_lqrCost; fprintf('OK (%5.2f s)\n', toc()); - -fprintf('* testing solver utilities:\n'); - -fprintf('%40s', 'MakeProblem... '); tic(); test_MakeProblem; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'CheckGamma... '); tic(); test_CheckGamma; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'LineFBE... '); tic(); test_LineFBE; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'SegmentFBE... '); tic(); test_SegmentFBE; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'FBE inequalities... '); tic(); test_inequalities1; fprintf('OK (%5.2f s)\n', toc()); -fprintf('%40s', 'FBE inequalities... '); tic(); test_inequalities2; fprintf('OK (%5.2f s)\n', toc()); - -fprintf('* testing composite problems:\n'); - -fprintf('%36s', 'SolveLasso_small'); tic(); test_SolveLasso_small; fprintf(' OK (%5.2f s)\n', toc()); -fprintf('%36s', 'SolveLasso_random'); tic(); test_SolveLasso_random; fprintf(' OK (%5.2f s)\n', toc()); -fprintf('%36s', 'SolveSparseLogReg_small'); tic(); test_SolveSparseLogReg_small ; fprintf(' OK (%5.2f s)\n', toc()); -fprintf('%36s', 'SolveNuclearNormMC_random'); tic(); test_SolveNuclearNormMC_random; fprintf(' OK (%5.2f s)\n', toc()); -% fprintf('%36s', 'SolveRankConstrMC_random'); tic(); test_SolveRankConstrMC_random; fprintf(' OK (%5.2f s)\n', toc()); - -fprintf('* testing separable problems:\n'); - -fprintf('%36s', 'SolveSVM_random'); tic(); test_SolveSVM_random; fprintf(' OK (%5.2f s)\n', toc()); -fprintf('%36s', 'SolveQP_random'); tic(); test_SolveQP_random; fprintf(' OK (%5.2f s)\n', toc()); - -rmpath(fullfile(fileparts(mfilename('fullpath')), 'tests')); +result = runtests('forbes.tests.test_lasso_small'); +result = runtests('forbes.tests.test_lasso_random'); +result = runtests('forbes.tests.test_sparse_logreg'); +% result = runtests('forbes.tests.test_nn_matcomp_small'); +result = runtests('forbes.tests.test_nn_matcomp_random'); diff --git a/forbes_uninstall.m b/forbes_uninstall.m index 0917e6b..1cfd33e 100644 --- a/forbes_uninstall.m +++ b/forbes_uninstall.m @@ -1,14 +1,12 @@ % Remove ForBES directory from MATLAB's path + forbes_path = fileparts(mfilename('fullpath')); -library_path = fullfile(forbes_path, 'library'); -cones_path = fullfile(forbes_path, 'cones'); -private_path = fullfile(forbes_path, 'private'); display(['Removing ForBES directory from MATLAB path: ', forbes_path]); rmpath(forbes_path); -display(['Removing ForBES library from MATLAB path: ', library_path]); -rmpath(library_path); savepath; display('ForBES was succesfully removed from MATLAB path'); -clear forbes_path library_path cones_path private_path +% Clear variables + +clear forbes_path; diff --git a/library/conjugate.m b/library/conjugate.m deleted file mode 100644 index f8e11a8..0000000 --- a/library/conjugate.m +++ /dev/null @@ -1,30 +0,0 @@ -function obj = conjugate(f) - if nargin < 1 || isempty(f) - obj = []; - return; - end - obj.isConvex = true; - obj.isQuadratic = f.isConjQuadratic; - obj.isConjQuadratic = f.isQuadratic; - obj.hasHessian = 0; - if isfield(f, 'makefconj') - obj.makef = f.makefconj; - end - if isfield(f, 'makef') - obj.makefconj = f.makef; - end - if isfield(f, 'makeprox') - obj.makeprox = @() make_prox_conj(f); - end -end - -function fun = make_prox_conj(f) - proxf = f.makeprox(); - fun = @(x, gam) call_prox_conj(x, gam, proxf); -end - -function [p, v] = call_prox_conj(y, gam, prox) - [z, v] = prox(y/gam, 1/gam); - p = y - gam*z; - v = y(:)'*z(:) - gam*(z(:)'*z(:)) - v; -end diff --git a/library/diagOp.m b/library/diagOp.m deleted file mode 100755 index 56da4b4..0000000 --- a/library/diagOp.m +++ /dev/null @@ -1,10 +0,0 @@ -function obj = diagOp(n, w) - if nargin < 1, error('you should provide the dimension of the space'); end - if numel(n) == 1, n = [n, 1]; end - if nargin < 2, w = 1; end - if numel(w) ~= 1 && any(size(w) ~= n), error('size of w must be 1 or n'); end - obj.m = n; - obj.n = n; - obj.makeop = @() @(x) w.*x; - obj.makeadj = @() @(y) w.*y; -end diff --git a/library/dist2Ball_l2.m b/library/dist2Ball_l2.m deleted file mode 100755 index 5b2dad0..0000000 --- a/library/dist2Ball_l2.m +++ /dev/null @@ -1,55 +0,0 @@ -%DIST2BALL_L2 Squared distance from a Euclidean ball of given center and radius -% -% DIST2BALL_L2(rho, c, w) builds the function -% -% f(x) = (w/2)*dist^2(x,B) where B is the ball ||x-c|| <= rho -% -% If c is not provided, c = 0. If also rho is not provided, rho = 1. -% Default weight is w = 1. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = dist2Ball_l2(rho, c, weight) - if nargin < 1 || isempty(rho) - rho = 1; - end - if nargin < 2 || isempty(c) - c = 0; - end - if nargin < 3 || isempty(weight) - weight = 1; - end - if ~isscalar(weight) || weight <= 0 - error('third argument (weight) must be a positive scalar'); - end - obj.makef = @() @(x) call_dist2Ball_l2_f(x, rho, c, weight); - obj.L = weight; -end - -function [val, grad] = call_dist2Ball_l2_f(x, rho, c, weight) - xc = x-c; - nxc = norm(xc); - if nxc <= rho - proj = x; - else - proj = c + (rho/nxc)*xc; - end - diff = x-proj; - val = (0.5*w)*(diff'*diff); - grad = weight*diff; -end diff --git a/library/dist2Box.m b/library/dist2Box.m deleted file mode 100755 index 7fc7854..0000000 --- a/library/dist2Box.m +++ /dev/null @@ -1,41 +0,0 @@ -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = dist2Box(lb, ub, weights) - % Function value and gradient of (w/2)*dist^2(x,C) where C is the box [lb,ub] - if nargin < 3 || isempty(weights) - weights = 1; - end - if nargin < 2 || isempty(ub) - ub = +inf; - end - if nargin < 1 || isempty(lb) - lb = -inf; - end - if any(weights < 0) - error('all weights must be nonnegative'); - end - obj.makef = @() @(x) call_dist2Box_f(x, lb ,ub, weights); - obj.L = max(weights); -end - -function [val, grad] = call_dist2Box_f(x,lb ,ub, weights) - proj = max(min(x,ub),lb); - diff = x - proj; - grad = weights.*diff; - val = 0.5*sum(weights.*(diff.*diff)); -end diff --git a/library/dist2Neg.m b/library/dist2Neg.m deleted file mode 100755 index 47c335c..0000000 --- a/library/dist2Neg.m +++ /dev/null @@ -1,38 +0,0 @@ -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = dist2Neg(weights,ub) - % Function value and gradient of (w/2)*dist^2(x,C) where C is the box [-infty,ub] - if nargin < 1 || isempty(weights) - weights = 1; - end - if nargin < 2 || isempty(ub) - ub = 0; - end - if any(weights < 0) - error('all weights must be nonnegative'); - end - obj.makef = @() @(x) call_dist2Neg_f(x, ub, weights); - obj.L = max(weights); -end - -function [val, grad] = call_dist2Neg_f(x, ub, weights) - proj = min(x,ub); - diff = x - proj; - grad = weights.*diff; - val = 0.5*sum(weights.*(diff.*diff)); -end diff --git a/library/dist2Pos.m b/library/dist2Pos.m deleted file mode 100755 index d355464..0000000 --- a/library/dist2Pos.m +++ /dev/null @@ -1,38 +0,0 @@ -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = dist2Pos(weights,lb) - % Function value and gradient of (w/2)*dist^2(x,C) where C is the box [lb,+infty] - if nargin < 1 || isempty(weights) - weights = 1; - end - if nargin < 2 || isempty(lb) - lb = 0; - end - if any(weights < 0) - error('all weights must be nonnegative'); - end - obj.makef = @() @(x) call_dist2Pos_f(x, lb, weights); - obj.L = max(weights); -end - -function [val, grad] = call_dist2Pos_f(x, lb, weights) - proj = max(x,lb); - diff = x - proj; - grad = weights.*diff; - val = 0.5*sum(weights.*(diff.*diff)); -end diff --git a/library/distBall_l2.m b/library/distBall_l2.m deleted file mode 100755 index 308a79d..0000000 --- a/library/distBall_l2.m +++ /dev/null @@ -1,56 +0,0 @@ -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = distBall_l2(rho,c,weight) - % Proximal mapping of w*dist(x,C) where C is the l2 ball ||x-c||<=rho - if nargin<3 || isempty(weight) - weight = 1; - end - if nargin<2 || isempty(c) - c = 0; - end - if nargin<1 || isempty(rho) - rho = 1; - end - - obj.makeprox = @() @(x, gam) call_distBall_l2_prox(x, gam, rho, c, weight); -end - -function [prox, val] = call_distBall_l2_prox(x, gam, rho, c, weight) - gam = weight*gam; - xc = x-c; - nxc = norm(xc); - if nxc <= rho - prox = x; - val = 0; - else - scale = rho/nxc; - if nxc > gam/(1-scale); - prox = x-(gam/nxc)*xc; - xc = prox-c; - nxc = norm(xc); - if nxc <= rho - val = 0; - else - val = weight*(1-rho/nxc)*nxc; - end - else - prox = c + scale*xc; - val = 0; - end - end -end diff --git a/library/distBox.m b/library/distBox.m deleted file mode 100755 index 0ab0d9e..0000000 --- a/library/distBox.m +++ /dev/null @@ -1,62 +0,0 @@ -%DISTBOX Distance from a box -% -% DISTBOX(l, u, w) builds the function -% -% g(x) = sum(w_i*(x_i - max{l_i, min{u_i, x_i}})) -% -% Boundaries l_i and u_i can take the value -inf and +inf respectively, -% in which case the corresponding segment is lower or upper unbounded. -% -% Weights w_i are assumed to be 1 if not provided. They can take the -% value +inf, in which case the distance from the corresponding segment -% [l_i,u_i] becomes the indicator function. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = distBox(lb, ub, weights) - if nargin < 3 || isempty(weights) - weights = 1; - end - if nargin < 2 || isempty(ub) - ub = +inf; - end - if nargin < 1 || isempty(lb) - lb = -inf; - end - if any(weights < 0) - error('all weights must be nonnegative'); - end - obj.makeprox = @() @(x, gam) call_distBox_prox(x, gam, lb, ub, weights); - obj.isQuadratic = false; - obj.isConjQuadratic = false; - obj.isConvex = true; -end - -function [prox, val] = call_distBox_prox(x, gam, lb, ub, weights) - mu = gam*weights; - prox = max(x-ub-mu, 0) - max(lb-x-mu, 0) + min(max(x, lb), ub); - if nargout > 1 - proj = max(lb, min(ub, prox)); - if isscalar(weights) - val = sum(weights*abs(prox-proj)); - else - finw = ~isinf(weights); - val = sum(weights(finw).*abs(prox(finw)-proj(finw))); - end - end -end diff --git a/library/distNeg.m b/library/distNeg.m deleted file mode 100755 index 4cd5a23..0000000 --- a/library/distNeg.m +++ /dev/null @@ -1,51 +0,0 @@ -%DISTNEG Distance from a box -% -% DISTNEG(w, u) builds the function -% -% g(x) = sum(w_i*(x_i - min{u_i, x_i})) -% -% Boundaries u_i can take the value +inf, in which case the corresponding -% halfline is upper unbounded. -% -% Weights w_i are assumed to be 1 if not provided. They can take the -% value +inf, in which case the distance from the corresponding halfline -% [-inf,u_i] becomes the indicator function. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = distNeg(weights, ub) - if nargin < 1 || isempty(weights) - weights = 1; - end - if nargin < 2 || isempty(ub) - ub = 0; - end - if any(weights < 0) - error('all weights must be nonnegative'); - end - obj.makeprox = @() @(x, gam) call_distNeg_prox(x, gam, ub, weights); -end - -function [prox, val] = call_distNeg_prox(x, gam, ub, weights) - mu = gam*weights; - prox = min(max(x-mu,ub),x); - if nargout > 1 - finw = ~isinf(weights); - val = sum(weights(finw).*max(prox(finw)-ub(finw),0)); - end -end diff --git a/library/distPos.m b/library/distPos.m deleted file mode 100755 index 0aad1df..0000000 --- a/library/distPos.m +++ /dev/null @@ -1,51 +0,0 @@ -%DISTPOS Distance from a box -% -% DISTPOS(w, l) builds the function -% -% g(x) = sum(w_i*(x_i - max{l_i, x_i})) -% -% Boundaries l_i can take the value -inf, in which case the corresponding -% halfline is lower unbounded. -% -% Weights w_i are assumed to be 1 if not provided. They can take the -% value +inf, in which case the distance from the corresponding halfline -% [l_i,+inf] becomes the indicator function. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = distPos(weights, lb) - if nargin < 1 || isempty(weights) - weights = 1; - end - if nargin < 2 || isempty(lb) - lb = 0; - end - if any(weights < 0) - error('all weights must be nonnegative'); - end - obj.makeprox = @() @(x, gam) call_distPos_prox(x, gam, lb, weights); -end - -function [prox, val] = call_distPos_prox(x, gam, lb, weights) - mu = gam*weights; - prox = max(min(x+mu,lb),x); - if nargout > 1 - finw = ~isinf(weights); - val = sum(weights(finw).*max(lb(finw)-prox(finw),0)); - end -end diff --git a/library/elasticNet.m b/library/elasticNet.m deleted file mode 100755 index aa9bba2..0000000 --- a/library/elasticNet.m +++ /dev/null @@ -1,38 +0,0 @@ -%ELASTICNET Allocates the elastic net regularization function. -% -% ELASTICNET(mu, lam) builds the function -% -% g(x) = mu*||x||_1 + (lam/2)*||x||^2 - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = elasticNet(mu, lam) - if nargin < 2 - lam = 1; - if nargin < 1, mu = 1; end - end - obj.makeprox = @() @(x, gam) call_elasticNet_prox(x, gam, mu, lam); -end - -function [prox, g] = call_elasticNet_prox(x, gam, mu, lam) - uz = max(0, abs(x)-gam*mu)/(1+lam*gam); - prox = sign(x).*uz; - if nargout >= 2 - g = mu*sum(uz)+(0.5*lam)*(uz'*uz); - end -end diff --git a/library/hankelOp.m b/library/hankelOp.m deleted file mode 100755 index ca57a78..0000000 --- a/library/hankelOp.m +++ /dev/null @@ -1,14 +0,0 @@ -function obj = hankelOp(p, q) - if nargin < 2 - error('you must provide at least arguments p, q'); - end - H = []; - n = p+q-1; - for i = 1:q - H = [H; spdiags(ones(p,1), i-1, p, n)]; - end - obj.m = [p, q]; - obj.n = [(p+q-1), 1]; - obj.makeop = @() @(x) reshape(H*x, p, q); - obj.makeadj = @() @(y) H'*vec(y); -end diff --git a/library/hingeLoss.m b/library/hingeLoss.m deleted file mode 100755 index a7a47ab..0000000 --- a/library/hingeLoss.m +++ /dev/null @@ -1,47 +0,0 @@ -%HINGELOSS Allocates the hinge loss function. -% -% HINGELOSS(mu, b) builds the function -% -% g(x) = mu*sum(max(0, 1-b.*x)) - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = hingeLoss(mu, b) - if nargin < 1, mu = 1.0; end - if nargin < 2, b = 1.0; end - obj.isConvex = 1; - if isscalar(b) - obj.makeprox = @() @(x, gam) call_hingeLoss_prox_scal(x, gam, mu, b); - else - obj.makeprox = @() @(x, gam) call_hingeLoss_prox(x, gam, mu, b); - end -end - -function [prox, g] = call_hingeLoss_prox_scal(x, gam, mu, b) - bx = b*x; ind = bx < 1; - prox(ind,1) = b*min(bx(ind)+gam*mu,1); - prox(~ind,1) = x(~ind); - g = mu*sum(max(0,1-b*prox)); -end - -function [prox, g] = call_hingeLoss_prox(x, gam, mu, b) - bx = b.*x; ind = bx < 1; - prox(ind,1) = b(ind).*min(bx(ind)+gam*mu,1); - prox(~ind,1) = x(~ind); - g = mu*sum(max(0,1-b.*prox)); -end diff --git a/library/huberLoss.m b/library/huberLoss.m deleted file mode 100755 index 9ac4f0c..0000000 --- a/library/huberLoss.m +++ /dev/null @@ -1,49 +0,0 @@ -%HUBERLOSS Allocates the Huber loss function. -% -% HUBERLOSS(del) builds the function -% -% f(x) = sum_i l(x_i) -% -% where -% -% l(x_i) = 0.5/del*x_i^2 if |x_i| <= del -% |x_i|-0.5*del otherwise - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = huberLoss(del) - % - % Only f available for this function - % - obj.makef = @() @(x) call_huberLoss_f(x, del); - obj.L = 1/del; -end - -function [val, grad] = call_huberLoss_f(x, del) - absx = abs(x); - small = absx <= del; - large = ~small; - sqx = (0.5/del)*(x(small).^2); - linx = absx(large)-0.5*del; - val = sum(sqx)+sum(linx); - if nargout >= 2 - grad = zeros(length(x),1); - grad(small) = x(small)/del; - grad(large) = sign(x(large)); - end -end diff --git a/library/indBall_l0.m b/library/indBall_l0.m deleted file mode 100644 index 2272b9f..0000000 --- a/library/indBall_l0.m +++ /dev/null @@ -1,19 +0,0 @@ -%INDBALL_L0 Indicator function of the (nonconvex) L0 ball with given radius. -% -% INDBALL_L0(N) builds the function -% -% g(x) = 0 if nnz(x) <= N -% = +inf otherwise -% -% Argument N is required. - -function obj = indBall_l0(N) - obj.makeprox = @() @(x, gam) call_indBall_l0_prox(x, N); -end - -function [prox, val] = call_indBall_l0_prox(x, N) - prox = x; - [~, I] = sort(abs(prox), 'descend'); - prox(I(N+1:end)) = 0; - val = 0; -end diff --git a/library/indBall_l2.m b/library/indBall_l2.m deleted file mode 100755 index cd676cf..0000000 --- a/library/indBall_l2.m +++ /dev/null @@ -1,46 +0,0 @@ -%INDBALL_L2 Indicator function of the L2 ball with given center and radius. -% -% INDBALL_L2(rho, c) builds the function -% -% g(x) = 0 if ||x-c|| <= rho -% = +inf otherwise -% -% If c is not provided, c = 0. If also rho is not provided, rho = 1. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indBall_l2(rho, c) - if nargin < 1 || isempty(rho) - rho = 1; - end - if nargin < 2 || isempty(c) - c = 0; - end - obj.makeprox = @() @(x, gam) call_indBall_l2_prox(x, rho, c); -end - -function [prox, val] = call_indBall_l2_prox(x, rho, c) - xc = x - c; - nxc = norm(xc); - if nxc <= rho - prox = x; - else - prox = c + (rho/nxc)*xc; - end - val = 0; -end diff --git a/library/indBalls_l0.m b/library/indBalls_l0.m deleted file mode 100644 index 5320185..0000000 --- a/library/indBalls_l0.m +++ /dev/null @@ -1,25 +0,0 @@ -%INDBALLS_L0 Indicator function of the product of (nonconvex) L0 balls. -% -% INDBALLS_L0(m, N) builds the product of L0 balls of dimension m with radius N. -% -% INDBALLS_L0(m, N, T) same as the above, but the L0 balls are -% intersected with [-T, T] boxes. - -function obj = indBalls_l0(m, N, T) - if nargin < 3 - T = inf; - end - obj.makeprox = @() @(x, gam) call_indBalls_l0_prox(x, m, N, T); -end - -function [prox, val] = call_indBalls_l0_prox(x, m, N, T) - n = length(x); - k = n/m; % we are assuming n is a multiple of m - prox = x; - for i = 1:k - [~, I] = sort(abs(prox((i-1)*m+1:i*m)), 'descend'); - prox((i-1)*m + I(1:N)) = max(-T, min(T, prox((i-1)*m + I(1:N)))); - prox((i-1)*m + I(N+1:end)) = 0; - end - val = 0; -end diff --git a/library/indBin.m b/library/indBin.m deleted file mode 100755 index 52f9cb4..0000000 --- a/library/indBin.m +++ /dev/null @@ -1,38 +0,0 @@ -%INDBIN Allocates the indicator function of a binary variable. -% -% INDBIN(v0, v1) builds indicator function of the set {v0, v1}^n. -% By default, v0 = 0 and v1 = 1. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indBin(v0, v1) - if nargin < 1, v0 = 0; end - if nargin < 2, v1 = 1; end - obj.makeprox = @() @(x, gam) call_indBin_prox(x, v0, v1); -end - -% TODO: implement faster solution for particular cases -% like {-1, 1} (using sign) or {0, 1} (using >= 0.5) - -function [prox, g] = call_indBin_prox(x, v0, v1) - mid = (v1+v0)/2; - ind = (x >= mid); - prox(ind, 1) = v1; - prox(~ind, 1) = v0; - g = 0; -end diff --git a/library/indBox.m b/library/indBox.m deleted file mode 100755 index afa4818..0000000 --- a/library/indBox.m +++ /dev/null @@ -1,37 +0,0 @@ -%INDBOX Indicator function of a box. -% -% INDBOX(l, u) builds the function -% -% g(x) = 0 if l_i <= x_i <= u_i for all i -% = +inf otherwise -% -% Arguments l and u can be either vectors of the same size of x -% or scalars. If any l_i (u_i) has value -inf (+inf) then the -% corresponding segment [l_i, u_i] is lower (upper) unbounded. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indBox(lower, upper) - obj.isConvex = 1; - obj.makeprox = @() @(x, gam) call_indBox_prox(x, lower, upper); -end - -function [prox, val] = call_indBox_prox(x, lower, upper) - prox = min(upper, max(lower, x)); - val = 0; -end diff --git a/library/indEpiSquaredNorm.m b/library/indEpiSquaredNorm.m deleted file mode 100644 index 447fd85..0000000 --- a/library/indEpiSquaredNorm.m +++ /dev/null @@ -1,65 +0,0 @@ -% INDEPISQUAREDNORM Indicator of the epigraph of the squared norm -% -% INDEPISQUAREDNORM builds the indicator of the epigraph of the squared -% norm, that is, the set -% -% C = {(x,t) : ||x||^2 <= t} -% - - -% Copyright (C) 2015-2017, KUL-FobBES (https://github.com/kul-forbes) -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indEpiSquaredNorm() -obj.isConvex = 1; -obj.makeprox = @() @(x, gam) epipr_sqnorm(x(1:end-1), x(end)); - -function [prox, val] = epipr_sqnorm(x,z) -if (x'*x <= z) - prox =[x; z]; val = 0; - return; -end -theta = 1 - 2 * z; -r = cubic_roots(theta, x); -for i=1:length(r), % Pick the right root - x_ = x/(1 + 2*(r(i) - z)); - if abs(norm(x_)^2-r(i)) < 1e-6, - z_ = r(i); - break; - end -end -prox=[x_; z_]; -val = 0; - -function [r, status] = cubic_roots(theta, x) -b=4*theta; c=theta^2; d=-x'*x; -D = 72*b*c*d - 4*b^3*d +b^2*c^2 - 16*c^3 - 432*d^2; -D0 = b^2 - 12*c; -status.D = D; status.D0 = D0; -if abs(D)<1e-14, - if abs(D0)<1e-14, % one triple root - r = -b/12; - status.msg = 'one triple root'; - else % a double root and a single one - r = zeros(2,1); - r(1) = (16*b*c - 144*d - b^3)/(4*D0); % single - r(2) = (36*d - b*c)/(2*D0); % double (cannot be) - status.msg = 'double plus single'; - end - return; -end -r = roots([4 b c d]); % eigenvalues of matrix \ No newline at end of file diff --git a/library/indNeg.m b/library/indNeg.m deleted file mode 100755 index 9d2021a..0000000 --- a/library/indNeg.m +++ /dev/null @@ -1,38 +0,0 @@ -%INDNEG Indicator function of the negative orthant. -% -% INDNEG(u) builds the function -% -% g(x) = 0 if x_i <= u_i for all i -% = +inf otherwise -% -% Argument u is either a scalar or a vector of the same size of x. If -% argument u is not given, then u = 0. If any u_i is +inf then the -% corresponding halfline [-inf, u_i] is unbounded. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indNeg(ub) - if nargin < 1 || isempty(ub), ub = 0; end - obj.isConvex = 1; - obj.makeprox = @() @(x, gam) call_indNeg_prox(x, ub); -end - -function [prox, val] = call_indNeg_prox(x, ub) - prox = min(ub, x); - val = 0; -end diff --git a/library/indPos.m b/library/indPos.m deleted file mode 100755 index 73ff260..0000000 --- a/library/indPos.m +++ /dev/null @@ -1,38 +0,0 @@ -%INDPOS Indicator function of the positive orthant. -% -% INDPOS(l) builds the function -% -% g(x) = 0 if x_i >= l_i for all i -% = +inf otherwise -% -% Argument l is either a scalar or a vector of the same size of x. If -% argument l is not given, then l = 0. If any l_i is +inf then the -% corresponding halfline [l_i, +inf] is unbounded. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indPos(lb) - if nargin < 1 || isempty(lb), lb = 0; end - obj.isConvex = 1; - obj.makeprox = @() @(x, gam) call_indPos_prox(x, lb); -end - -function [prox, val] = call_indPos_prox(x, lb) - prox = max(lb, x); - val = 0; -end diff --git a/library/indRankBall.m b/library/indRankBall.m deleted file mode 100755 index 6a6b19b..0000000 --- a/library/indRankBall.m +++ /dev/null @@ -1,63 +0,0 @@ -%INDRANKBALL Allocates the nuclear norm function -% -% INDRANKBALL(m, n, r) equivalent to INDRANKBALL(m, n, r, 'svds') -% -% INDRANKBALL(m, n, r, method) builds the function -% -% g(x) = 0 if rank(x) <= r, +infinity otherwise -% -% where x is a vector of length m*n, containing the stacked columns of a -% m-by-n matrix. -% -% Argument 'method' selects how the SVD is computed: -% 'svds': use MATLAB's svds -% 'lansvd': use PROPACK'S lansvd - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indRankBall(m, n, r, method, opt) - if nargin < 3 - error('you must provide the number of rows and columns, m and n, and rank r as arguments'); - end - if nargin < 4, method = 'svds'; end - if nargin < 5, opt = struct(); end - switch method - case 'svds' - obj.makeprox = @() @(x, gam) call_indRankBall_proj_svds(x, m, n, r, opt); - case 'lansvd' - obj.makeprox = @() @(x, gam) call_indRankBall_proj_lansvd(x, m, n, r, opt); - otherwise - error('unknown method for computing SVDs'); - end -end - -function [prox, val] = call_indRankBall_proj_svds(x, m, n, r, opt) - [U, S, V] = svds(reshape(x, m, n), r, 'largest', opt); - prox = reshape(U*(S*V'), m*n, 1); - if nargout >= 2 - val = 0; - end -end - -function [prox, val] = call_indRankBall_proj_lansvd(x, m, n, r, opt) - [U, S, V] = lansvd(reshape(x, m, n), r, 'L', opt); - prox = reshape(U*(S*V'), m*n, 1); - if nargout >= 2 - val = 0; - end -end diff --git a/library/indSOC.m b/library/indSOC.m deleted file mode 100644 index cd2ec64..0000000 --- a/library/indSOC.m +++ /dev/null @@ -1,60 +0,0 @@ -%INDSOC Indicator function of the positive orthant. -% -% INDSOC(b) builds the indicator of the translated second-order cone, i.e. -% -% g(x) = 0 if norm(z(2:end)) <= z(1), where z = x-b -% = +inf otherwise -% -% If argument b is not given or empty, then b = 0. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indSOC(b) - if nargin < 1 || isempty(b), b = 0; end - obj.isConvex = 1; - obj.makeprox = @() @(x, gam) call_indSOC_prox(x, b); -end - -function [prox, val] = call_indSOC_prox(x, b) - z = x-b; - - if isempty(z) - z=[]; - return; - elseif length(z)==1 - z = max(z,0); - return; - end - - v1 = z(1); - v2 = z(2:end); - normv2 = norm(v2); - - if v1 <= -normv2 - z = zeros(length(z), 1); - elseif v1 >= normv2 - z = z; - else - a = (v1+normv2)/2; - z(1) = a; - z(2:end) = a*(z(2:end)/normv2); - end - - prox = z+b; - val = 0; -end diff --git a/library/indZero.m b/library/indZero.m deleted file mode 100755 index c25a8bf..0000000 --- a/library/indZero.m +++ /dev/null @@ -1,34 +0,0 @@ -%INDZERO Indicator function of the set {0} -% -% INDZERO() builds the function -% -% g(x) = 0 if x = 0 -% = +inf otherwise - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = indZero() - obj.isConvex = 1; - obj.makeprox = @() @(x, gam) call_indZero_proj(x); -end - -function [prox, val] = call_indZero_proj(x) - [n, m] = size(x); - prox = zeros(n, m); - val = 0; -end diff --git a/library/l0Norm.m b/library/l0Norm.m deleted file mode 100755 index f732047..0000000 --- a/library/l0Norm.m +++ /dev/null @@ -1,37 +0,0 @@ -%L0NORM Allocates the L0 norm function. -% -% L0NORM(mu) builds the function -% -% g(x) = mu*||x||_0 = mu*nnz(x) - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = l0Norm(mu) - if nargin < 1 - mu = 1; - end - obj.makeprox = @() @(x, gam) call_l0Norm_prox(x, gam, mu); -end - -function [prox, g] = call_l0Norm_prox(x, gam, mu) - over = abs(x) > sqrt(2*gam*mu); - prox = x.*over; - if nargout >= 2 - g = mu*nnz(prox); - end -end diff --git a/library/l1Norm.m b/library/l1Norm.m deleted file mode 100755 index 47d4381..0000000 --- a/library/l1Norm.m +++ /dev/null @@ -1,41 +0,0 @@ -%L1NORM Allocates the L1 norm function. -% -% L1NORM(mu) builds the function -% -% g(x) = mu*||x||_1 - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = l1Norm(mu) - if nargin < 1 - mu = 1; - end - obj.isConvex = 1; - obj.isQuadratic = 0; - obj.isConjQuadratic = 0; - obj.makeprox = @() @(x, gam) call_l1Norm_prox(x, gam, mu); -end - -function [prox, g] = call_l1Norm_prox(x, gam, mu) -% prox(x) and g(prox(x)) for function g(x) = mu*||x||_1 - uz = max(0.0, abs(x)-gam*mu); - prox = sign(x).*uz; - if nargout >= 2 - g = mu*sum(uz); - end -end diff --git a/library/l2Norm.m b/library/l2Norm.m deleted file mode 100755 index c4810bd..0000000 --- a/library/l2Norm.m +++ /dev/null @@ -1,25 +0,0 @@ -%L2NORM Allocates the L2 norm function. -% -% L2NORM(mu) builds the function -% -% g(x) = mu*||x||_2 - -function obj = l2Norm(mu) - if nargin < 1 - mu = 1; - end - obj.makeprox = @() @(x, gam) call_l2Norm_prox(x, gam, mu); -end - -function [prox, val] = call_l2Norm_prox(x, gam, mu) - normx = sqrt(x'*x); - mugam = mu*gam; - if normx <= mugam - prox = zeros(length(x),1); - val = 0; - else - scal = (1-mugam/normx); - prox = (1-mugam/normx)*x; - val = mu*scal*normx; - end -end diff --git a/library/l2NormSum.m b/library/l2NormSum.m deleted file mode 100755 index 2313be8..0000000 --- a/library/l2NormSum.m +++ /dev/null @@ -1,51 +0,0 @@ -%L2NORMSUM Allocates the sum-of-L2-norm function. -% -% L2NORMSUM(m, mu) builds the function -% -% g(x) = mu*sum(||x_i||_2) -% -% where x_i are blocks of size m of vector x. If mu is not provided it is -% assumed mu = 1. If also m is not provided, then m = 1 and the function -% is the L1 norm. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = l2NormSum(m, mu) - if nargin < 2 - mu = 1; - if nargin < 1 - m = 1; - end - end - obj.makeprox = @() @(x, gam) call_l2NormSum_prox(x, gam, m, mu); -end - -function [z, v] = call_l2NormSum_prox(x, gam, m, mu) - n = length(x); - nb = n/m; - x_resh = reshape(x, m, nb); - modx = sqrt(sum(x_resh.*x_resh, 1)); - newmodx = max(0, modx-gam*mu); - scal_block = newmodx./max(gam*mu, modx); - scal_resh = repmat(scal_block, m, 1); - scal = reshape(scal_resh, n, 1); - z = scal.*x; - if nargout >= 2 - v = mu*sum(newmodx); - end -end diff --git a/library/linOp.m b/library/linOp.m deleted file mode 100755 index 55a136e..0000000 --- a/library/linOp.m +++ /dev/null @@ -1,39 +0,0 @@ -%LINOP Allocates a linear operator given function handles computing the -%mapping and adjoint mapping. -% -% LINOP(op, adj, m, n) builds the linear operator defined by MATLAB -% function handles op (computing the operator) and adj (computing the -% adjoint operator). Parameters m and n are respectively the dimensions -% of the target space and domain of op. -% -% All parameters are compulsory. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = linOp(op, adj, m, n) - if nargin < 4 - error('you should provide 4 arguments: op, adj, m, n'); - end - if ~isa(op, 'function_handle') || ~isa(adj, 'function_handle') - error('first two arguments should be function handles'); - end - obj.m = m; - obj.n = n; - obj.makeop = @() op; - obj.makeadj = @() adj; -end diff --git a/library/logLoss.m b/library/logLoss.m deleted file mode 100755 index 998b5c6..0000000 --- a/library/logLoss.m +++ /dev/null @@ -1,48 +0,0 @@ -%LOGLOSS Allocates the log-logistic loss function. -% -% LOGLOSS(mu) builds the log-logistic loss function -% -% f(x) = mu*(sum_i log(1+exp(-x_i))) -% -% If not provided, mu = 1. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = logLoss(mu) - if nargin < 1, mu = 1; end - obj.makef = @() @(x) call_logLoss_f(x, mu); - obj.L = mu; % Lipschitz constant of the gradient of f - obj.hasHessian = 1; - obj.isConvex = 1; - obj.isQuadratic = 0; -end - -function [val, grad, hess] = call_logLoss_f(x, mu) - % value and gradient of f(x) = mu*sum(log(1+exp(-x))) - emx = exp(-x); - invpx = (1+emx); - val = sum(log(invpx))*mu; - if nargout >= 2 - px = 1./invpx; - grad = (px-1)*mu; - if nargout >= 3 - h = px.*(1-px); - hess = mu*diag(sparse(h)); - end - end -end diff --git a/library/lpNorm.m b/library/lpNorm.m deleted file mode 100644 index a8cd5ab..0000000 --- a/library/lpNorm.m +++ /dev/null @@ -1,74 +0,0 @@ -%LPNORM Allocates the L1 norm function. -% -% LPNORM(type, mu) builds the function -% -% g(x) = mu*sum(|x_i|^p) -% -% where if type = 'onehalf' then p = 1/2, -% 'twothirds' then p = 2/3. -% If mu is not provided, then mu = 1.0. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = lpNorm(type, mu) - if nargin < 1 - error('must provide type = ''onehalf'' or ''twothirds'''); - end - if nargin < 2 - mu = 1; - end - obj.isConvex = 0; - obj.isQuadratic = 0; - obj.isConjQuadratic = 0; - switch type - case 'onehalf' - obj.makeprox = @() @(x, gam) call_l_1_2_prox(x, gam, mu); - case 'twothirds' - error('not yet implemented'); -% obj.makeprox = @() @(x, gam) call_l_2_3_prox(x, gam, mu); - end -end - -% The implementation of the proximal mappings is based on: -% Cao, Sun, Xu, "Fast image deconvolution using closed-form -% thresholding formulas of L_q (q=1/2,2/3) regularization" (2013) - -function [prox, g] = call_l_1_2_prox(x, gam, mu) - lam = 2*gam*mu; - p = nthroot(54, 3)/4*nthroot(lam, 3)^2; - absx = abs(x); - phi = acos((lam/8)*sqrt((absx/3).^(-3))); - ind0 = (absx <= p); - prox(ind0, 1) = 0; - prox(~ind0, 1) = (2/3)*(sign(x(~ind0)).*absx(~ind0).*(1+cos((2/3)*(pi-phi(~ind0))))); - g = mu*sum(abs(prox).^(0.5)); -end - -% The following doesn't seem to work; must check what's wrong - -function [prox, g] = call_l_2_3_prox(x, gam, mu) - lam = 2*gam*mu; - p = (2/3)*(3*lam^3)^(0.25); - absx = abs(x); - phi = sqrt(lam^(-3))*(27/16)*x.^2; - A = (2/sqrt(3))*lam^(0.25)*sqrt(cosh(phi/3)); - ind0 = (absx <= p); - prox(ind0, 1) = 0; - prox(~ind0, 1) = sign(x(~ind0)).*((A(~ind0) + sqrt(2*absx(~ind0)./A(~ind0) - A(~ind0).^2))/2).^3; - g = mu*sum(abs(prox).^(2/3)); -end diff --git a/library/lqrCost.m b/library/lqrCost.m deleted file mode 100755 index 8dff882..0000000 --- a/library/lqrCost.m +++ /dev/null @@ -1,141 +0,0 @@ -%LQRCOST Allocates the linear quadratic regulator (LQR) cost function -% -% LQRCOST(x0, Q, R, Q_f, A, B, N) builds the LQR cost with stage matrices -% Q (for states) and R (for inputs), final cost matrix Q_f, dynamics A -% and B, prediction horizon N and initial state x0, i.e. the function -% -% f(x, u) = 0.5*sum(x[k]'*Q*x[k] + u[k]'*R*u[k], k=0,...,N-1) [stage cost] -% -% + 0.5*(x[N]'*Q_N*x[N]) [final cost] -% -% if x[0] = x0 and x[k+1] = A x[k] + B u[k], k = 0,...,N-1, and -% -% f(x, u) = +inf -% -% otherwise. -% -% LQRCOST(..., xref) defines the same cost function as in the previous -% case, but with the quadratic penalties -% -% (x[k]-xref)'*Q*(x[k]-xref), k = 0,...,N -% -% in the stage and final terms of the cost. -% -% LQRCOST(x0, obj) updates and return the LQR function obj with the new -% initial state x0. -% -% LQRCOST(x0, obj, xref) updates and return the LQR function with new -% initial state x0 and reference state xref. -% -% Example: -% -% f = LQRCOST(x0, Q, R, Q_f, A, B, N); -% [compute the next state x1 of the system] -% f = LQRCOST(x1, f); - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = lqrCost(x0, varargin) - % - % Only f conjugate is available. - % - if length(varargin) > 2 - obj.Q = varargin{1}; - obj.R = varargin{2}; - obj.Q_f = varargin{3}; - obj.A = varargin{4}; - obj.B = varargin{5}; - obj.N = varargin{6}; - obj.QR = blkdiag(obj.Q, obj.R); - obj = RiccatiFactor(obj); - if length(varargin) >= 7 - xref = varargin{7}; - % a reference state different from zero results in a linear - % tilting of f, plus the addition of a constant: record these - % tilting & constant so to take into account for them when - % computing the conjugate - obj.tilt = [repmat([obj.Q*xref; zeros(size(obj.R, 1), 1)], obj.N, 1); obj.Q_f*xref]; - obj.diff = (obj.N+1)/2*norm(xref)^2; - else - obj.tilt = 0; - obj.diff = 0; - end - obj.makefconj = @() @(w) call_lqrCost_fconj(w, x0, obj); - else - obj = varargin{1}; - if length(varargin) == 2 - xref = varargin{2}; - obj.tilt = [repmat([obj.Q*xref; zeros(size(obj.R, 1), 1)], obj.N, 1); obj.Q_f*xref]; - obj.diff = (obj.N+1)/2*norm(xref)^2; - else - obj.tilt = 0; - obj.diff = 0; - end - obj.makefconj = @() @(w) call_lqrCost_fconj(w, x0, obj); - end - obj.isConvex = 1; - obj.isQuadratic = 0; - obj.isConjQuadratic = 1; -end - -function [fcw, xu] = call_lqrCost_fconj(w, x0, obj) - [n_x, n_u] = size(obj.B); - [~, xu] = RiccatiSolve(w+obj.tilt, x0, obj.A, obj.B, obj.LRs, obj.Ks, obj.Ms, obj.Ls, int32(n_x), int32(n_u), int32(obj.N)); - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % Less efficient -% fxu = 0; -% for i=0:obj.N-1 -% x_i = xu(i*n_xu+1:i*n_xu+n_x); -% u_i = xu(i*n_xu+n_x+1:(i+1)*n_xu); -% fxu = fxu + 0.5*(x_i'*(obj.Q*x_i) + u_i'*(obj.R*u_i)); -% end - % More efficient - XU_stage = reshape(xu(1:end-n_x), n_x + n_u, obj.N); - fxu = 0.5*sum(sum(XU_stage.*(obj.QR*XU_stage))); - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - x_N = xu(obj.N*(n_x+n_u)+1:end); - fxu = fxu + 0.5*(x_N'*(obj.Q_f*x_N)); - fcw = (w+obj.tilt)'*xu - fxu - obj.diff; -end - -function obj = RiccatiFactor(obj) - n = size(obj.Q,1); - m = size(obj.R,1); - Ps = zeros(n, n, obj.N+1); - Ps(:,:,obj.N+1) = obj.Q_f; - obj.LRs = zeros(m, m, obj.N); - obj.Ss = zeros(m, n, obj.N); - obj.Ks = zeros(m, n, obj.N); - obj.Ms = zeros(m, n, obj.N); - obj.Ls = zeros(n, n, obj.N); - for k = obj.N:-1:1 - Rbar = obj.R + obj.B'*(Ps(:,:,k+1)*obj.B); - Rbar = (Rbar+Rbar')/2; - LR = chol(Rbar, 'lower'); - obj.LRs(:,:,k) = LR; - obj.Ss(:,:,k) = obj.B'*(Ps(:,:,k+1)*obj.A); - obj.Ks(:,:,k) = -(LR'\(LR\obj.Ss(:,:,k))); - Ps(:,:,k) = obj.Q + obj.A'*(Ps(:,:,k+1)*obj.A) + obj.Ss(:,:,k)'*obj.Ks(:,:,k); - Ps(:,:,k) = (Ps(:,:,k) + Ps(:,:,k)')/2; - end - for k = 1:obj.N - LR = obj.LRs(:,:,k); - obj.Ms(:,:,k) = -(LR'\(LR\obj.B')); - obj.Ls(:,:,k) = (obj.A + obj.B*obj.Ks(:,:,k))'; - end -end diff --git a/library/matFac.m b/library/matFac.m deleted file mode 100755 index 03ef871..0000000 --- a/library/matFac.m +++ /dev/null @@ -1,43 +0,0 @@ -%MATFAC -% -% MATFAC(A, r) returns the function -% -% f(x) = 0.5*||A-UV||^2_F -% -% where x = [U(:); V(:)]. If A is n-times-m then U is n-times-r and V is -% r-times-m, therefore it must be length(x) = (n+m)*r. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = matFac(A, r) - if nargin < 2 - error('two arguments are required: A (matrix to factor) and r (rank of the factorization)'); - end - [n, m] = size(A); - obj.makef = @() @(x) call_matrixFactorization_fun(x, A, n, r, m); -end - -function [val, grad] = call_matrixFactorization_fun(x, A, n, r, m) - nr = n*r; - mr = m*r; - U = reshape(x(1:nr), n, r); - V = reshape(x(nr+1:nr+mr), r, m); - res = U*V - A; - val = 0.5*norm(res, 'fro')^2; - grad = [reshape((res*V'), nr, 1); reshape((U'*res), mr, 1)]; -end diff --git a/library/matOp.m b/library/matOp.m deleted file mode 100755 index 078f880..0000000 --- a/library/matOp.m +++ /dev/null @@ -1,37 +0,0 @@ -%MATOP Allocates a linear operator given a matrix -% -% MATOP(A) builds the linear operator associated with A. This is just for -% completeness, as ForBES actually accepts matrices as linear operators, -% and treats them accordingly. -% -% All parameters are compulsory. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = matOp(A) - if nargin < 1 - error('you should provide 1 arguments: A'); - end - if ~ismatrix(A) - error('first argument must be a matrix'); - end - obj.m = [size(A, 1), 1]; - obj.n = [size(A, 2), 1]; - obj.makeop = @() @(x) A*x; - obj.makeadj = @() @(y) A'*y; -end diff --git a/library/moreauEnvelope.m b/library/moreauEnvelope.m deleted file mode 100755 index 2b9090b..0000000 --- a/library/moreauEnvelope.m +++ /dev/null @@ -1,46 +0,0 @@ -%MOREAUENVELOPE Defines the Moreau envelope of a proximable function -% -% MOREAUENVELOPE(f, gam) where f is a function object, and gam > 0 is a -% real number, returns function F(x) defined as -% -% F(x) = f(prox(x, gam)) + 1/(2*gam)*||x - prox(x, gam)||^2 -% -% where prox is the proximal mapping associated with f. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = moreauEnvelope(obj1, gam) - if ~isfield(obj1, 'makeprox') - error('the function does not have the proximal mapping defined'); - end - if gam <= 0 - error('second argument must be a positive real number'); - end - proxf1 = obj1.makeprox(); - obj.L = 1/gam; - obj.makef = @() @(x) call_moreauEnvelope_f1(x, proxf1, gam); -end - -function [v, grad] = call_moreauEnvelope_f1(x, proxf1, gam) - [z, f1z] = proxf1(x, gam); - res = x - z; - v = f1z + 0.5/gam*norm(res, 2)^2; - if nargout >= 2 - grad = res/gam; - end -end diff --git a/library/nuclearNorm.m b/library/nuclearNorm.m deleted file mode 100755 index c521f8a..0000000 --- a/library/nuclearNorm.m +++ /dev/null @@ -1,180 +0,0 @@ -%NUCLEARNORM Allocates the nuclear norm function -% -% NUCLEARNORM(m, n) equivalent to NUCLEARNORM(m, n, 1.0, 'exact', 'svds') -% -% NUCLEARNORM(m, n, lam, mode, method) builds the function -% -% g(x) = lam*||x||_* -% -% where ||.||_* is the nuclear norm for m-by-n matrices. The function -% argument x is either an m-by-n matrix, or a vector of length m*n containing -% the stacked columns of an m-by-n matrix. (The prox will return a -% consistent array, i.e., with the same shape as the input argument x). -% -% If the third argument lam is not provided, lam = 1. -% -% Argument 'mode' selects how to compute the proximal operator -% associated with the function: -% 'exact': compute the full svd using MATLAB's svd (default) -% 'adaptive': compute the exact prox using a partial svd -% 'inexact': compute an inexact prox using a partial svd -% -% Argument 'method' selects how the SVD is computed in 'adaptive' -% and 'inexact' mode: -% 'svds': use MATLAB's svds (default) -% 'lansvd': use PROPACK'S lansvd - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = nuclearNorm(m, n, lam, mode, method) - global nsv; - global flagadd; - nsv = 10; - flagadd = 0; - if nargin < 2 - error('you must provide the number of rows and columns, m and n, as arguments'); - end - if nargin < 3, lam = 1; end - if nargin < 4, mode = 'exact'; end - if nargin < 5, method = 'svds'; end - switch mode - case 'exact' % exact prox - obj.makeprox = @() @(x, gam) call_nuclearNorm_prox(x, gam, m, n, lam); - case 'adaptive' % adaptive prox - switch method - case 'svds' - obj.makeprox = @() @(x, gam) call_nuclearNorm_prox_adaptive_svds(x, gam, m, n, lam); - case 'lansvd' - obj.makeprox = @() @(x, gam) call_nuclearNorm_prox_adaptive_lansvd(x, gam, m, n, lam); - otherwise - error('unknown method for computing SVDs'); - end - case 'inexact' % adaptive/inexact prox - switch method - case 'svds' - obj.makeprox = @() @(x, gam) call_nuclearNorm_prox_inexact_svds(x, gam, m, n, lam); - case 'lansvd' - obj.makeprox = @() @(x, gam) call_nuclearNorm_prox_inexact_lansvd(x, gam, m, n, lam); - otherwise - error('unknown method for computing SVDs'); - end - end -end - -function [prox, val] = call_nuclearNorm_prox(x, gam, m, n, lam) - [m_orig, n_orig] = size(x); - [U, S, V] = svd(reshape(x, m, n), 'econ'); - diagS1 = max(0, diag(S)-lam*gam); - S1 = diag(sparse(diagS1)); - prox = reshape(U*(S1*V'), m_orig, n_orig); - if nargout >= 2 - val = lam*sum(diagS1); - end -end - -function [prox, val] = call_nuclearNorm_prox_adaptive_svds(x, gam, m, n, lam) - global nsv; - global flagadd; - [m_orig, n_orig] = size(x); - maxrank = min(m, n); - flagok = 0; - while ~flagok - [U, S, V] = svds(reshape(x, m, n), nsv, 'L'); - diagS1 = max(0, diag(S)-lam*gam); - if nnz(diagS1) == length(diagS1) - if flagadd - nsv = min(maxrank, 10+nsv); - else - nsv = min(maxrank, 2*nsv); - end - else - nsv = nnz(diagS1)+1; - flagok = 1; - flagadd = 1; - end - end - S1 = diag(sparse(diagS1)); - prox = reshape(U*(S1*V'), m_orig, n_orig); - if nargout >= 2 - val = lam*sum(diagS1); - end -end - -function [prox, val] = call_nuclearNorm_prox_inexact_svds(x, gam, m, n, lam) - global nsv; - [m_orig, n_orig] = size(x); - maxrank = min(m, n); - [U, S, V] = svds(reshape(x, m, n), nsv, 'L'); - diagS1 = max(0, diag(S)-lam*gam); - if nnz(diagS1) == length(diagS1) - nsv = min(maxrank, nsv+5); - else - nsv = nnz(diagS1)+1; - end - S1 = diag(sparse(diagS1)); - prox = reshape(U*(S1*V'), m_orig, n_orig); - if nargout >= 2 - val = lam*sum(diagS1); - end -end - -function [prox, val] = call_nuclearNorm_prox_adaptive_lansvd(x, gam, m, n, lam) - global nsv; - global flagadd; - [m_orig, n_orig] = size(x); - maxrank = min(m, n); - flagok = 0; - while ~flagok - [U, S, V] = lansvd(reshape(x, m, n), nsv, 'L'); - diagS1 = max(0, diag(S)-lam*gam); - if nnz(diagS1) == length(diagS1) - if flagadd - nsv = min(maxrank, 10+nsv); - else - nsv = min(maxrank, 2*nsv); - end - else - nsv = nnz(diagS1)+1; - flagok = 1; - flagadd = 1; - end - end - S1 = diag(sparse(diagS1)); - prox = reshape(U*(S1*V'), m_orig, n_orig); - if nargout >= 2 - val = lam*sum(diagS1); - end -end - -function [prox, val] = call_nuclearNorm_prox_inexact_lansvd(x, gam, m, n, lam) - global nsv; - [m_orig, n_orig] = size(x); - maxrank = min(m, n); - [U, S, V] = lansvd(reshape(x, m, n), nsv, 'L'); - diagS1 = max(0, diag(S)-lam*gam); - if nnz(diagS1) == length(diagS1) - nsv = min(maxrank, nsv+5); - else - nsv = nnz(diagS1)+1; - end - S1 = diag(sparse(diagS1)); - prox = reshape(U*(S1*V'), m_orig, n_orig); - if nargout >= 2 - val = lam*sum(diagS1); - end -end diff --git a/library/powabs.m b/library/powabs.m deleted file mode 100755 index 8dc63cb..0000000 --- a/library/powabs.m +++ /dev/null @@ -1,62 +0,0 @@ -%POWABS POWer of ABSolute value function. -% -% POWABS(p, c) builds the function -% -% g(x) = c*|x|^p -% -% If c is not provided, it is assumed to be 1. If also p is not provided, -% it is assumed to be 1. -% -% See Combettes, Dung, Vu, Dualization of Signal Recovery Problems, Example 2.15 - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = powabs(p, c) - if nargin<2 || isempty(c) - c = 1; - if nargin<1 || isempty(p) - p = 1; - end - end - pows = [1;4/3;3/2;2;3;4]; - if all((p == pows) == 0) - error('Prox is not computable') - end - obj.makeprox = @() @(x, gam) call_powabs_prox(x, gam, p, c); -end - -function [prox, val] = call_powabs_prox(x, gam, p, c) - gam = c*gam; - switch p - case 1 - prox = sign(x)*max(abs(x)-gam,0); - case 4/3 - rho = sqrt(x^2+256*gam^3/729); - prox = x + (4*gam)/(3*2^(1/3))*(abs(rho-x)^(1/3)-abs(rho+x)^(1/3)); - case 3/2 - prox = x + 9*gam^2*sign(x)*(1-sqrt(1+16*abs(x)/(9*gam^2)))/8; - case 2 - prox = x/(1+2*gam); - case 3 - prox = sign(x)*(sqrt(1+12*gam*abs(x))-1)/(6*gam); - case 4 - rho = sqrt(x^2+1/(27*gam)); - prox = abs((rho + x)/(8*gam))^(1/3)-abs((rho - x)/(8*gam))^(1/3); - end - val = c*abs(prox)^p; -end diff --git a/library/quadLoss.m b/library/quadLoss.m deleted file mode 100755 index 42799f7..0000000 --- a/library/quadLoss.m +++ /dev/null @@ -1,73 +0,0 @@ -%QUADLOSS Allocates the squared norm function. -% -% QUADLOSS(w, p) builds the function -% -% f(x) = 0.5*sum_i w_i(x_i-p_i)^2 -% -% If w is a positive scalar then w_i = w (same for p). If omitted, w = 1 -% and p = 0. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = quadLoss(w, p) - if nargin < 1, w = 1; end - if nargin < 2, p = 0; end - if any(w < 0) - error('first argument should be nonnegative'); - end - obj.isConvex = 1; - obj.isQuadratic = 1; - obj.isConjQuadratic = 1; - obj.hasHessian = 1; - obj.L = max(w); - if isscalar(w) - obj.makef = @() @(x) call_squaredWeightedDistance(x, w, p); - if w > 0 - obj.makefconj = @() @(x) call_squaredWeightedDistance_conj(x, 1/w, p); - end - elseif ismatrix(w) - obj.makef = @() @(x) call_squaredWeightedDistance(x, w, p); - if all(w > 0) - obj.makefconj = @() @(x) call_squaredWeightedDistance_conj(x, 1./w, p); - end - end - obj.makeprox = @() @(x, gam) call_squaredWeightedDistance_prox(x, gam, w); -end - -function [v, g, H] = call_squaredWeightedDistance(x, w, p) - res = x-p; - g = w.*res; - v = 0.5*(res(:)'*g(:)); - if nargout >= 3 - H = @(x) w.*x; - end -end - -function [v, g, H] = call_squaredWeightedDistance_conj(y, w, p) - g = p + w.*y; - v = 0.5*(y(:)'*(g(:) + p(:))); - if nargout >= 3 - H = @(x) w.*x; - end -end - -function [prox, val] = call_squaredWeightedDistance_prox(x, gam, w) - wgam = w.*gam; - prox = x./(1+wgam); - val = ((w.*prox)'*prox)/2; -end diff --git a/library/quadLossOverAffine.m b/library/quadLossOverAffine.m deleted file mode 100755 index ca7fb05..0000000 --- a/library/quadLossOverAffine.m +++ /dev/null @@ -1,45 +0,0 @@ -%QUADLOSSOVERAFFINE Allocates the squared distance function over an affine subspace. -% -% QUADLOSSOVERAFFINE(A, b, w, p) returns the function -% -% f(x) = 0.5*sum_i w_i(x_i-p_i)^2 subject to A*x = b -% -% Requires LDLCHOL and LDLSOLVE from SuiteSparse by Tim Davis. -% See: http://faculty.cse.tamu.edu/davis/suitesparse.html - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = quadLossOverAffine(A, b, w, p) - if nargin < 4, p = 0; end - if nargin < 3, w = 1; end - if isscalar(w), w = repmat(w, size(A,2), 1); end - obj.isConjQuadratic = 1; - obj.makefconj = @() make_quadLossOverAffine_conj(w, p, A, b); -end - -function fun = make_quadLossOverAffine_conj(w, p, A, b) - LD = ldlchol(A*diag(sparse(1./sqrt(w))), 1e-12); - fun = @(y) call_quadLossOverAffine_conj(y, LD, w, p, A, b); -end - -function [val, grad] = call_quadLossOverAffine_conj(y, LD, w, p, A, b) - wyp = (y./w)+p; - grad = wyp-(A'*ldlsolve(LD, A*wyp-b))./w; - gradp = grad-p; - val = y'*grad-0.5*(gradp'*(w.*gradp)); -end diff --git a/library/quadratic.m b/library/quadratic.m deleted file mode 100755 index ce0ce28..0000000 --- a/library/quadratic.m +++ /dev/null @@ -1,95 +0,0 @@ -%QUADRATIC Allocates a quadratic function. -% -% QUADRATIC(Q, q) builds the function -% -% f(x) = 0.5*x'*Q*x+q'*x -% -% Both arguments are required. Matrix Q can be a scalar, in which case -% it is intended to be a diagonal matrix with uniform diagonal elements. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = quadratic(Q, q) - obj.isQuadratic = 1; - obj.isConjQuadratic = 1; - obj.hasHessian = 1; - if isa(Q, 'function_handle') - obj.makef = @() @(x) call_quadratic_fun_handle(Q, q, x); - else - obj.makef = @() @(x) call_quadratic_fun_matrix(Q, q, x); - obj.makeprox = @() make_quadratic_prox(Q, q); - end - obj.makefconj = @() make_quadratic_conj(Q, q); -end - -function [v, g, Q] = call_quadratic_fun_matrix(Q, q, x) - g = Q*x+q; - v = 0.5*(g+q)'*x; -end - -function [v, g, Q] = call_quadratic_fun_handle(Q, q, x) - g = Q(x)+q; - v = 0.5*(g+q)'*x; -end - -function fun = make_quadratic_conj(Q, q) - if issparse(Q) - [L,flag,p] = chol(Q,'lower','vector'); - if flag~=0 - error('Q is not positive definite') - end - fun = @(y) call_quadratic_sparse_conj(L, p, q, y); - else - [L,flag] = chol(Q,'lower'); - if flag~=0 - error('Q is not positive definite') - end - fun = @(y) call_quadratic_dense_conj(L, q, y); - end -end - -function [v, g] = call_quadratic_sparse_conj(L, p, q, y) - rhs = y-q; - g(p,1) = L'\(L\rhs(p)); - v = 0.5*(y-q)'*g; -end - -function [v, g] = call_quadratic_dense_conj(L, q, y) - g = L'\(L\(y-q)); - v = 0.5*(y-q)'*g; -end - -function prox = make_quadratic_prox(Q, q) - clear prox_quadratic; - prox = @(x, gam) prox_quadratic(x, gam, Q, q); -end - -function [p, v] = prox_quadratic(x, gam, Q, q) - % using persistent variables - % bad practice, dirty trick, I'm ashamed of myself - persistent stored_gam stored_L; - if isempty(stored_gam) || isempty(stored_L) || gam ~= stored_gam - % factor matrix when gam changes (or at the first call) - stored_gam = gam; - n = length(x); - I = speye(n); - stored_L = chol(I + gam*Q); % do differently for sparse? - end - p = stored_L\(stored_L'\(x - gam*q)); - v = 0.5*(p'*(Q*p)) + q'*p; % can we save something here? -end \ No newline at end of file diff --git a/library/quadraticOverAffine.m b/library/quadraticOverAffine.m deleted file mode 100755 index 78fdeda..0000000 --- a/library/quadraticOverAffine.m +++ /dev/null @@ -1,45 +0,0 @@ -%QUADRATICOVERAFFINE Allocates a quadratic function over an affine subspace. -% -% QUADRATICOVERAFFINE(A, b, Q, q) builds the function -% -% f(x) = 0.5*(x'*Q*x)+q'*x subject to A*x = b - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = quadraticOverAffine(A, b, Q, q) - if nargin < 2, error('you should provide at least 2 arguments'); end - if nargin < 3, Q = 1; end - if nargin < 4, q = zeros(size(A, 2)); end - if isscalar(Q), Q = Q*speye(size(A, 2)); end - obj.isQuadratic = 0; - obj.isConjQuadratic = 1; - obj.makefconj = @() make_quadraticOverAffine_conj(Q, q, A, b); -end - - -function fc = make_quadraticOverAffine_conj(Q, q, A, b) - m = size(A,1); - [L,D,P] = ldl([Q A';A sparse(m,m)]); - fc = @(y) eval_quadraticOverAffine_conj(y, L, D, P, Q, q, b); -end - -function [val, grad] = eval_quadraticOverAffine_conj(y, L, D, P, Q, q, b) - grad = P*(L'\(D\(L\(P'*[y-q; b])))); - grad = grad(1:length(q),1); - val = -(0.5*grad'*(Q*grad)+(q-y)'*grad); -end diff --git a/library/separableSum.m b/library/separableSum.m deleted file mode 100644 index 56273f9..0000000 --- a/library/separableSum.m +++ /dev/null @@ -1,123 +0,0 @@ -%SEPARABLESUM Combines functions into their separable sum -% -% SEPARABLESUM(fs, sizes, idx) where fs is a cell array of function -% objects, sizes is a cell array containing size vectors, idx is an -% integer vector of the same length of sizes. -% If length(idx) = length(sizes) = k, then SEPARABLESUM returns the -% function object correspondent to the sum -% -% f(x) = sum_i=1...k fs{idx(i)}(x_i) -% -% i.e., the sum of k functions, the ith being idx(i) and applied to a -% block of prod(sizes{i}) variables. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = separableSum(objs, dims, idx) - - l = length(objs); - if nargin < 3, idx = 1:l; end - if length(idx) == 1, obj = objs{idx(1)}; return; end - % determine Lipschitz constant (if possible) - maxL = -1; - noL = 0; - obj.isConvex = 1; - obj.isQuadratic = 1; - obj.isConjQuadratic = 1; - obj.hasHessian = 1; - for i = 1:l - if ~isfield(objs{i}, 'L'), noL = 1; - elseif objs{i}.L > maxL, maxL = objs{i}.L; end - if ~isfield(objs{i}, 'isConvex') || objs{i}.isConvex == 0 - obj.isConvex = 0; - end - if ~isfield(objs{i}, 'isQuadratic') || objs{i}.isQuadratic == 0 - obj.isQuadratic = 0; - end - if ~isfield(objs{i}, 'isConjQuadratic') || objs{i}.isConjQuadratic == 0 - obj.isConjQuadratic = 0; - end - if ~isfield(objs{i}, 'hasHessian') || objs{i}.hasHessian == 0 - obj.hasHessian = 0; - end - end - if noL == 0 - obj.L = maxL; - end - for i = 1:length(dims) - if numel(dims{i}) == 1, dims{i} = [dims{i}, 1]; end - end - nsum(1) = prod(dims{1}); - for i = 2:length(idx) - nsum(i) = nsum(i-1) + prod(dims{i}); - end - obj.makeprox = @() make_separableSum_prox(objs, idx, nsum, dims); - obj.makef = @() make_separableSum_callf(objs, idx, nsum, dims); - -end - -function fun = make_separableSum_prox(objs, idx, nsum, dims) - proxes = {}; - for i=1:length(objs) - proxes{end+1} = objs{i}.makeprox(); - end - fun = @(x, gam) call_separableSum_prox(x, gam, proxes, idx, nsum, dims); -end - -function [prox, val] = call_separableSum_prox(x, gam, proxes, idx, nsum, dims) - prox = zeros(nsum(end), 1); - val = 0; - baseidx = 1; - for i=1:length(idx) - xcurr = x(baseidx:nsum(i)); - [z, val1] = proxes{idx(i)}(reshape(xcurr, dims{i}(1), dims{i}(2)), gam); - prox(baseidx:nsum(i)) = z(:); - val = val+val1; - baseidx = nsum(i)+1; - end -end - -function fun = make_separableSum_callf(objs, idx, nsum, dims) - callfs = {}; - for i=1:length(objs) - callfs{end+1} = objs{i}.makef(); - end - fun = @(x) call_separableSum_f(x, callfs, idx, nsum, dims); -end - -function [val, grad] = call_separableSum_f(x, callfs, idx, nsum, dims) - val = 0; - grad = []; - baseidx = 1; - if nargout >= 2 - for i=1:length(idx) - xcurr = x(baseidx:nsum(i)); - [val1, grad1] = callfs{idx(i)}(reshape(xcurr, dims{i}(1), dims{i}(2))); - val = val+val1; - grad = [grad; grad1(:)]; - baseidx = nsum(i)+1; - end - else - for i=1:length(idx) - xcurr = x(baseidx:nsum(i)); - [val1] = callfs{idx(i)}(reshape(xcurr, dims{i}(1), dims{i}(2))); - val = val+val1; - baseidx = nsum(i)+1; - end - end -end diff --git a/library/smoothFunction.m b/library/smoothFunction.m deleted file mode 100644 index 2d1b5f7..0000000 --- a/library/smoothFunction.m +++ /dev/null @@ -1,20 +0,0 @@ -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = smoothFunction(f) - obj.makef = @() f; -end diff --git a/library/stackOp.m b/library/stackOp.m deleted file mode 100755 index e548ab0..0000000 --- a/library/stackOp.m +++ /dev/null @@ -1,46 +0,0 @@ -function obj = stackOp(objs, idx) - l = length(objs); - if nargin < 2, idx = 1:l; end - - msum = zeros(1, length(idx)); - dims = [zeros(length(idx), 1), ones(length(idx), 1)]; - msum(1) = prod(objs{1}.m); - dims(1,1) = objs{1}.m(1); - if size(objs{1}.m, 2) > 1, dims(1,2) = objs{1}.m(2); end - n = objs{1}.n; - for i = 2:length(idx) - if any(objs{idx(i)}.n ~= n), error('all operators must have image in the same space'); end - dims(i,1) = objs{i}.m(1); - if size(objs{i}.m, 2) > 1, dims(i,2) = objs{i}.m(2); end - msum(i) = msum(i-1) + prod(dims(i,:)); - end - - callops = {}; - calladjs = {}; - for i = 1:l - callops{i} = objs{i}.makeop(); - calladjs{i} = objs{i}.makeadj(); - end - - obj.m = [msum(end), 1]; - obj.n = n; - obj.makeop = @() @(x) call_stackOps(x, callops, idx); - obj.makeadj = @() @(y) call_sumOps(y, calladjs, idx, msum, dims); -end - -function y = call_sumOps(x, ops, idx, nsum, dims) - y = 0; - baseidx = 1; - for i = 1:length(idx) - y = y + ops{idx(i)}(reshape(x(baseidx:nsum(i)), dims(i,1), dims(i,2))); - baseidx = nsum(i)+1; - end -end - -function x = call_stackOps(y, ops, idx) - x = []; - for i = 1:length(idx) - z = ops{idx(i)}(y); - x = [x; z(:)]; - end -end diff --git a/library/sumOp.m b/library/sumOp.m deleted file mode 100755 index 57df5d8..0000000 --- a/library/sumOp.m +++ /dev/null @@ -1,46 +0,0 @@ -function obj = sumOp(objs, idx) - l = length(objs); - if nargin < 2, idx = 1:l; end - - nsum = zeros(1, length(idx)); - dims = [zeros(length(idx), 1), ones(length(idx), 1)]; - nsum(1) = prod(objs{1}.n); - dims(1,1) = objs{1}.n(1); - if size(objs{1}.n, 2) > 1, dims(1,2) = objs{1}.n(2); end - m = objs{1}.m; - for i = 2:length(idx) - if any(objs{idx(i)}.m ~= m), error('all operators must have image in the same space'); end - dims(i,1) = objs{i}.n(1); - if size(objs{i}.n, 2) > 1, dims(i,2) = objs{i}.n(2); end - nsum(i) = nsum(i-1) + prod(dims(i,:)); - end - - callops = {}; - calladjs = {}; - for i = 1:l - callops{i} = objs{i}.makeop(); - calladjs{i} = objs{i}.makeadj(); - end - - obj.m = m; - obj.n = [nsum(end), 1]; - obj.makeop = @() @(x) call_sumOps(x, callops, idx, nsum, dims); - obj.makeadj = @() @(y) call_stackOps(y, calladjs, idx); -end - -function y = call_sumOps(x, ops, idx, nsum, dims) - y = 0; - baseidx = 1; - for i = 1:length(idx) - y = y + ops{idx(i)}(reshape(x(baseidx:nsum(i)), dims(i,1), dims(i,2))); - baseidx = nsum(i)+1; - end -end - -function x = call_stackOps(y, ops, idx) - x = []; - for i = 1:length(idx) - z = ops{idx(i)}(y); - x = [x; z(:)]; - end -end diff --git a/library/zeroFunction.m b/library/zeroFunction.m deleted file mode 100755 index ca0926d..0000000 --- a/library/zeroFunction.m +++ /dev/null @@ -1,41 +0,0 @@ -%ZEROFUNCTION Allocates the function identically equal to zero. -% -% ZEROFUNCTION(n) builds the function -% -% f(x) = 0 -% -% where x is a vector of length n. - -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . - -function obj = zeroFunction() - obj.isConvex = 1; - obj.isQuadratic = 1; - obj.makef = @() @(x) call_zeroFunction_fun(x); - obj.makeprox = @() @(x, gam) call_zeroFunction_prox(x); -end - -function [val, grad] = call_zeroFunction_fun(x) - val = 0; - grad = zeros(length(x), 1); -end - -function [prox, val] = call_zeroFunction_prox(x) - prox = x; - val = 0; -end diff --git a/private/Direction_bfgs.m b/private/Direction_bfgs.m deleted file mode 100644 index 9976c90..0000000 --- a/private/Direction_bfgs.m +++ /dev/null @@ -1,31 +0,0 @@ -% BFGS - -function [dir, tau0, cache] = Direction_bfgs(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); - -[m, n] = size(v); -v = v(:); - -if it == 1 || restart - dir = -v; - cache.H = eye(prod(prob.n)); -else - H = cache.H; - YSk = yk'*sk; - if YSk > 0 - Bs = H'*(H*sk); - sBs = sk'*Bs; - H = cholupdate(cholupdate(H,yk/sqrt(YSk)),Bs/sqrt(sBs),'-'); - else - cache.cntSkip = cache.cntSkip+1; - end - dir = -linsolve(H,linsolve(H,v,opt.optsL),opt.optsU); - cache.H = H; -end - -tau0 = 1.0; -dir = reshape(dir, m, n); - -end diff --git a/private/Direction_bfgs_naive.m b/private/Direction_bfgs_naive.m deleted file mode 100644 index 8f82444..0000000 --- a/private/Direction_bfgs_naive.m +++ /dev/null @@ -1,32 +0,0 @@ -% BFGS, naive implementation intended for debugging purposes -% (direct update & backslash) - -function [dir, tau0, cache] = Direction_bfgs_naive(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); - -[m, n] = size(v); -v = v(:); - -if it == 1 || restart - dir = -v; - cache.B = eye(prod(prob.n)); -else - B = cache.B; - YSk = yk'*sk; - if YSk > 0 - Bs = B*sk; - sBs = sk'*Bs; - B = B + (yk*yk')/YSk - (Bs*Bs')/sBs; - else - cache.cntSkip = cache.cntSkip+1; - end - dir = -B\v; - cache.B = B; -end - -tau0 = 1.0; -dir = reshape(dir, m, n); - -end diff --git a/private/Direction_broyden.m b/private/Direction_broyden.m deleted file mode 100644 index d725ed1..0000000 --- a/private/Direction_broyden.m +++ /dev/null @@ -1,40 +0,0 @@ -% Broyden method - -function [dir, tau0, cache] = Direction_broyden(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); - -[m, n] = size(v); -v = v(:); - -if it == 1 || restart - dir = -v; - cache.H = eye(prod(prob.n)); -else - H = cache.H; - sts = sk'*sk; - switch opt.modBroyden - case 3 % absolute value of determinant (guarantees nonsingularity) - prev_v = cache.prev_v; - prev_tau = norm(sk)/norm(cache.prev_dir); - lam = sk'*(H*yk)/sts; - if abs(lam) < opt.thetaBar - theta = (1-sign0(lam)*opt.thetaBar)/(1-lam); - yk = theta*yk - (1-theta)*prev_tau*prev_v; - end - otherwise - error('not implemented'); - end - Hy = H*yk; - H = H + (sk-Hy)*(sk'*H)/(sk'*Hy); - dir = -H*v; - cache.H = H; -end - -cache.prev_v = v; -cache.prev_dir = dir; -tau0 = 1.0; -dir = reshape(dir, m, n); - -end diff --git a/private/Direction_cgdesc.m b/private/Direction_cgdesc.m deleted file mode 100644 index 4780683..0000000 --- a/private/Direction_cgdesc.m +++ /dev/null @@ -1,30 +0,0 @@ -% CG-descent - -function [dir, cache] = Direction_cgdesc(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); - -[m, n] = size(v); -v = v(:); - -if it == 1 || restart - dir = -cache_current.gradFBE; % Initially use steepest descent direction -else - yy = cache_current.gradFBE-cache_previous.gradFBE; - dy = dir'*yy; - lambda = 1; %Hager-Zhang proposed lambda = 2 but Kou, Dai found that lambda = 1 is more efficient - % lambda = 2-(dir'*yy)/((dir'*dir)*(yy'*yy)); - beta = ((yy-lambda*dir*(yy'*yy)/dy)'*cache_current.gradFBE)/dy; - etak = -1/(norm(dir)*min(0.01,norm(cache_current.gradFBE))); - beta = max(beta,etak); - dir = -cache_current.gradFBE + beta*dir; - if dir'*cache_current.gradFBE >= 0 % restart if not descent direction - dir = -cache_current.gradFBE; - cntSkip = cntSkip+1; - end -end - -dir = reshape(dir, m, n); - -end diff --git a/private/Direction_cgdy.m b/private/Direction_cgdy.m deleted file mode 100644 index 132572c..0000000 --- a/private/Direction_cgdy.m +++ /dev/null @@ -1,24 +0,0 @@ -% CG method of Dai, Yuan - -function [dir, tau0, cache] = Direction_cgdy(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); -v = v(:); - -if it == 1 || restart - dir = -v; % Initially use steepest descent direction - tau0 = 1.0/norm(dir, inf); -else - betaDY = (v'*v)/(sk'*yk); - dir = -v + betaDY*sk; - if dir'*v >= 0 % restart if not descent direction - dir = -v; - tau0 = 1.0/norm(dir, inf); - cache.cntSkip = cache.cntSkip+1; - else - tau0 = (sk'*sk)/(sk'*yk); - end -end - -end diff --git a/private/Direction_cgdyhs.m b/private/Direction_cgdyhs.m deleted file mode 100644 index 9ab3d3a..0000000 --- a/private/Direction_cgdyhs.m +++ /dev/null @@ -1,26 +0,0 @@ -% CG, hybrid method Dai-Yuan/Hestenes-Stiefel - -function [dir, tau0, cache] = Direction_cgdyhs(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); -v = v(:); - -if it == 1 || restart - dir = -v; % Initially use steepest descent direction - tau0 = 1.0/norm(dir, inf); -else - betaDY = (v'*v)/(sk'*yk); - betaHS = (v'*yk)/(sk'*yk); - beta = max(0.0,min(betaHS,betaDY)); - dir = -v + beta*sk; - if dir'*v >= 0 % restart if not descent direction - dir = -v; - tau0 = 1.0/norm(dir, inf); - cache.cntSkip = cache.cntSkip+1; - else - tau0 = (sk'*sk)/(sk'*yk); - end -end - -end diff --git a/private/Direction_cgprp.m b/private/Direction_cgprp.m deleted file mode 100644 index bcd1a4c..0000000 --- a/private/Direction_cgprp.m +++ /dev/null @@ -1,27 +0,0 @@ -% CG method of Polak-Ribiere-Polyak - -function [dir, tau0, cache] = Direction_cgprp(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); -v = v(:); - -if it == 1 || restart - dir = -v; % Initially use steepest descent direction - tau0 = 1.0/norm(dir, inf); -else - beta = max((v'*yk)/cache.sqnorm_prev_v,0); - dir = -v + beta*cache.prev_dir; - if dir'*v >= 0 % restart if not descent direction - dir = -v; - tau0 = 1.0/norm(dir, inf); - cache.cntSkip = cache.cntSkip+1; - else - tau0 = (sk'*sk)/(sk'*yk); - end -end - -cache.sqnorm_prev_v = norm(v)^2; -cache.prev_dir = dir; - -end diff --git a/private/Direction_lbfgs.m b/private/Direction_lbfgs.m deleted file mode 100644 index 7027ef9..0000000 --- a/private/Direction_lbfgs.m +++ /dev/null @@ -1,39 +0,0 @@ -% L-BFGS - -function [dir, tau0, cache] = Direction_lbfgs(prob, opt, it, restart, sk, yk, v, cache) - -sk = full(sk(:)); -yk = full(yk(:)); - -[m, n] = size(v); -v = full(v(:)); - -if it == 1 || restart - dir = -v; % use steepest descent direction initially - cache.LBFGS_col = 0; % last column of Sk, Yk that was filled in - cache.LBFGS_mem = 0; % current memory of the method -else - YSk = yk'*sk; - if YSk > 0.0 - cache.LBFGS_col = 1+mod(cache.LBFGS_col, opt.memory); - cache.LBFGS_mem = min(cache.LBFGS_mem+1, opt.memory); - cache.S(:,cache.LBFGS_col) = sk; - cache.Y(:,cache.LBFGS_col) = yk; - cache.YS(cache.LBFGS_col) = YSk; - else - cache.cntSkip = cache.cntSkip+1; - end - if cache.LBFGS_mem > 0 - H = cache.YS(cache.LBFGS_col)/... - (cache.Y(:,cache.LBFGS_col)'*cache.Y(:,cache.LBFGS_col)); - dir = lbfgs(cache.S, cache.Y, cache.YS, H, ... - -v, int32(cache.LBFGS_col), int32(cache.LBFGS_mem)); - else - dir = -v; - end -end - -tau0 = 1.0; -dir = reshape(dir, m, n); - -end diff --git a/private/Direction_lbroyden.m b/private/Direction_lbroyden.m deleted file mode 100644 index 72e3956..0000000 --- a/private/Direction_lbroyden.m +++ /dev/null @@ -1,57 +0,0 @@ -% limited memory Broyden method - -function [dir, tau0, cache] = Direction_lbroyden(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); - -[m, n] = size(v); -v = v(:); - -if it == 1 || restart - dir = -v; % use steepest descent direction initially - cache.S = []; % stores vectors sk - cache.Y = []; % stores vectors yk - cache.W = []; % stores columns of H0*Y-S - cache.StY = []; % stores inner products - cache.M = []; % these two are m-by-m where m is the memory of the method - cache.LBroyden_mem = 0; -else - % damping - if opt.modBroyden == 2 % enforces positive curvature along sk - sig = 0.1; - prev_v = cache.prev_v; - prev_tau = cache.prev_tau; - sty = sk'*yk; - stv = sk'*prev_v; - if sty < sig*prev_tau*abs(stv) - theta = (1+sign(stv)*sig)*prev_tau*stv/(prev_tau*stv + sty); - yk = theta*yk - (1-theta)*prev_tau*prev_v; - end - end - delta = 1; % diagonal of H0 - wk = delta*yk - sk; - if cache.LBroyden_mem == opt.memory, idx0 = 2; - else idx0 = 1; cache.LBroyden_mem = cache.LBroyden_mem+1; end - S0 = cache.S(:,idx0:end); - Y0 = cache.Y(:,idx0:end); - W0 = cache.W(:,idx0:end); - StY0 = cache.StY(idx0:end,idx0:end); - M0 = cache.M(idx0:end,idx0:end); - % update matrices S, Y, W, StY, M - cache.S = [S0, sk]; - cache.Y = [Y0, yk]; - cache.W = [W0, wk]; - if isempty(Y0), cache.StY = sk'*yk; - else cache.StY = [[StY0; sk'*Y0], cache.S'*yk]; end - if isempty(S0), cache.M = 0; - else cache.M = [[M0; S0(:,end)'*S0], zeros(cache.LBroyden_mem, 1)]; end - K = delta * cache.StY - cache.M; - % compute direction - dir = delta*(cache.W * (K\(cache.S'*v)) - v); -end - -tau0 = 1.0; -dir = reshape(dir, m, n); - -end diff --git a/private/Direction_rbroyden.m b/private/Direction_rbroyden.m deleted file mode 100644 index 0d5c691..0000000 --- a/private/Direction_rbroyden.m +++ /dev/null @@ -1,92 +0,0 @@ -% DIRECTION_RBROYDEN computes search directions according to the restarted -% modified broyden method. -% -% Parameters: -% -% prob: ForBES problem object (class ProblemComposite) -% opt: ForBES options object (struct) -% it: iteration number (1-based) -% restart: flag indicating that the method should be restarted -% sk, yk: pair used to perform the update -% v: vector to be multiplied by the Jacobian approximation (e.g., current residual) -% cache: object containint the method's memory (struct) -% -% Return values: -% -% dir: the computed direction -% tau0: the initial stepsize to be tried -% cache: the updated method's memory -% - -function [dir, tau0, cache] = Direction_rbroyden(prob, opt, it, restart, sk, yk, v, cache) - -sk = sk(:); -yk = yk(:); - -[m, n] = size(v); -v = v(:); - -if it == 1 || restart - dir = -v; - cache.S = []; - cache.HY = []; - cache.shy = []; -else - yts = yk'*sk; - yty = yk'*yk; - if opt.initialScaling - h = yts/yty; - else - h = 1; - end - dir = -h*v; - hy = h*yk; - for i=1:size(cache.S, 2) - hy = hy + ((hy'*cache.S(:,i))/cache.shy(i)) * (cache.S(:,i)-cache.HY(:,i)); - dir = dir + ((dir'*cache.S(:,i))/cache.shy(i)) * (cache.S(:,i)-cache.HY(:,i)); - end - u = opt.metric(sk); - ni = sk'*u; - shy = hy'*u; - % damping - switch opt.modBroyden - case 1 % enforces positive curvature along sk - % compute theta_{k-1}; if not 1 then update HYk = H_{k-1}\tilde y_{k-1} - prev_v = cache.prev_v; - prev_tau = norm(sk)/norm(cache.prev_dir); - delta = -prev_tau*(sk'*prev_v); % delta = \delta_{k-1} = = -tau* (Rw^k = Rxold) - if yts < opt.deltaCurvature*abs(delta) - theta = (1-sign0(delta)*opt.deltaCurvature)*delta/(delta-yts); - hy = (1-theta)*sk + theta*hy; - shy = (1-theta)*sts + theta*shy; - end - case 3 % nonsingularity - % compute theta_{k-1}; if not 1 then update HYk = H_{k-1}\tilde y_{k-1} - gam = shy/ni; - if abs(gam) < opt.thetaBar - theta = (1-sign0(gam)*opt.thetaBar)/(1-gam); - hy = theta*(sk-hy); % now hy is s - H\tilde y - shy = (1-theta)*ni + theta*shy; - end - otherwise - error('not implemented'); - end - dir = dir + ((dir'*u)/shy) * hy; - % update buffer - if size(cache.S,2) < opt.memory - cache.S = [cache.S, u]; - cache.HY = [cache.HY, hy]; - cache.shy = [cache.shy, shy]; - else - cache.S = []; - cache.HY = []; - cache.shy = []; - end -end - -cache.prev_v = v; -cache.prev_dir = dir; -tau0 = 1.0; -dir = reshape(dir, m, n); - -end diff --git a/private/Direction_sd.m b/private/Direction_sd.m deleted file mode 100644 index 7761aa0..0000000 --- a/private/Direction_sd.m +++ /dev/null @@ -1,10 +0,0 @@ -% Steepest descent - -function [dir, tau0, cache] = Direction_sd(prob, opt, it, restart, sk, yk, v, cache) - -v = v(:); - -dir = -v; -tau0 = 1.0/norm(dir, inf); - -end diff --git a/private/LineSearch_Armijo.m b/private/LineSearch_Armijo.m deleted file mode 100755 index 9c2fc1d..0000000 --- a/private/LineSearch_Armijo.m +++ /dev/null @@ -1,77 +0,0 @@ -function [t, cachet, cachet1, ops, exitflag] = LineSearch_Armijo(cache, dir, slope, t0, lsopt, ref) -%ARMIJOLS - computes a steplength t > 0 so that it satisfies the Armijo condition -% -% f(t) <= f(0) + delta*f'(0) -% -% exitflag = -1: gam is not small enough -% exitflag = 0: acceptable steplength was found -% exitflag = 1: maximum number of backtracking steps exceeded -% exitflag = 2: no further progress can be made - - % precompute stuff for the line search - [cache, ops] = CacheLineSearch(cache, dir); - - cachet1 = []; - - gam = cache.gam; - - arm_hi = lsopt.delta*slope; - t = t0; - exitflag = 1; - if nargin >= 6 - f0 = ref; - else - f0 = cache.FBE; - end - for i = 1:lsopt.nLS - [cachet, ops1] = LineFBE(cache, t, 1); - ops = OpsSum(ops, ops1); - ft = cachet.FBE; - if ft <= f0 + t*arm_hi - exitflag = 0; - break; - end - if i == 1 %quadratic interpolation - tn = ArmijoQuadInterp(f0, slope, t, ft); - else %cubic interpolation - tn = ArmijoCubInterp(f0, slope, told, ftold, t, ft); - end - if tn <= 0 - tn = 0.5*t; - end - told = t; - ftold = ft; - t = tn; - if t <= lsopt.progTol - exitflag = 2; - break - end - end - if exitflag == 0 && lsopt.testGamma - [flagGamma, cachet, cachet1, ops1] = CheckGamma(cachet, gam, lsopt.beta); - ops = OpsSum(ops, ops1); - exitflag = flagGamma-1; % because CheckGamma returns 1 (good gamma) or 0 (bad gamma) - end - -end - -function t = ArmijoQuadInterp(f0,df0,t,ft) - % Minimizer of interpolant belongs to [0,t1] - tdf0 = t*df0; - q = ft-f0-tdf0; - if q > 0%quadratic is strongly convex - t = -(tdf0*t)/(2*q); - else - t = -1; - end -end - -function t = ArmijoCubInterp(f,df,t0,f0,t1,f1) - % Minimizer of interpolant belongs to [0,t1] - t02 = t0^2; - t12 = t1^2; - ab = 1/(t02*t12*(t1-t0))*[t02 -t12;-t0^3 t1^3]*[f1-f-df*t1;f0-f-df*t0]; - a = ab(1); - b = ab(2); - t = (-b+sqrt(b^2-3*a*df))/(3*a); -end diff --git a/private/LineSearch_Backtracking.m b/private/LineSearch_Backtracking.m deleted file mode 100755 index 2c2348d..0000000 --- a/private/LineSearch_Backtracking.m +++ /dev/null @@ -1,33 +0,0 @@ -function [t, cachet, cachet1, lsopt, exitflag] = LineSearch_Backtracking... - (cache, direction, slope, t0, lsopt, adaptive, it, restart, ref, lin, const) - - if nargin < 9, ref = cache.Get_FBE(); end - if nargin < 10, lin = 0.0; end - if nargin < 11, const = 0.0; end - - cache.Set_Directions(direction); - - cachet1 = []; - - t = t0; - exitflag = 1; - - for i = 1:lsopt.nLS - cachet = cache.Get_CacheLine(t, 1); - ft = cachet.Get_FBE(); - if ft <= ref + t*lin + 1e-14*abs(ref) + const - exitflag = 0; - break; - end - t = 0.5*t; - if t <= lsopt.progTol - exitflag = 2; - break - end - end - - if exitflag == 0 && adaptive - [flag, cachet1] = cachet.Backtrack_Gamma(lsopt.beta); - exitflag = -1*flag; - end -end diff --git a/private/LineSearch_BacktrackingArmijo.m b/private/LineSearch_BacktrackingArmijo.m deleted file mode 100644 index 3870c31..0000000 --- a/private/LineSearch_BacktrackingArmijo.m +++ /dev/null @@ -1,7 +0,0 @@ -function [t, cachet, cachet1, lsopt, exitflag] = LineSearch_BacktrackingArmijo(cache, dir, slope, t0, lsopt, adaptive, it, restart, varargin) - -ref = cache.FBE; % f(0) -lin = lsopt.delta*slope; % delta f'(0) -[t, cachet, cachet1, lsopt, exitflag] = LineSearch_Backtracking(cache, dir, slope, t0, lsopt, adaptive, it, restart, ref, lin); - -end diff --git a/private/LineSearch_BacktrackingNM.m b/private/LineSearch_BacktrackingNM.m deleted file mode 100644 index 970e1c1..0000000 --- a/private/LineSearch_BacktrackingNM.m +++ /dev/null @@ -1,15 +0,0 @@ -function [t, cachet, cachet1, lsopt, exitflag] = LineSearch_BacktrackingNM... - (cache, dir, slope, t0, lsopt, adaptive, it, restart, ref, lin, const) - -if it == 1 || restart - lsopt.Q = 1; - lsopt.C = ref; -else - newQ = lsopt.eta*lsopt.Q+1; - lsopt.C = (lsopt.eta*lsopt.Q*lsopt.C + ref)/newQ; - lsopt.Q = newQ; -end - -[t, cachet, cachet1, lsopt, exitflag] = LineSearch_Backtracking(cache, dir, slope, t0, lsopt, adaptive, it, restart, lsopt.C, lin, const); - -end diff --git a/private/LineSearch_Fletcher.m b/private/LineSearch_Fletcher.m deleted file mode 100755 index eca64de..0000000 --- a/private/LineSearch_Fletcher.m +++ /dev/null @@ -1,206 +0,0 @@ -function [cachet, t, cnt, exitflag ] = LineSearch_Fletcher(prob,gam,cache,df0,lsopt) -%FletcherLS - computes a steplength t > 0 so that it satisfies the strong Wolfe conditions -% -% f(t) <= f(0) + delta*f'(0) -% abs(f'(t)) <= -sigma*f'(0). -% -% exitflag = 0: acceptable steplength was found -% exitflag = 1: steplength t for which f(t) < fminimum was found -% exitflag = -1: maximum number of bracketing or sectioning iterations reached -% exitflag = -2: no further progress can be made -% -% Algorithm is described in -% -% R. Fletcher, Practical Methods of Optimization, John Wiley & Sons, 1987, -% second edition, section 2.6. - - lsopt.wolfe_hi = lsopt.delta*df0; - lsopt.wolfe_lo = lsopt.sigma*df0; - f0 = cache.FBE; - t0 = lsopt.tau0; - % Q, A, C, f2, proxg - cnt = [0, 0, 0, 0, 0]; - - % Find a bracket of acceptable points - [cachet,a,b,fa,dfa,fb,dfb,t,exitflag,cnt1] = FletcherBracket(cache,prob,gam,f0,df0,t0,lsopt); - cnt = cnt+cnt1; - - if exitflag == 2 - % BracketingPhase found a bracket containing acceptable points; now find acceptable point - % within bracket - [cachet,t,exitflag,cnt1] = FletcherSection(cache,prob,gam,f0,a,fa,dfa,b,fb,dfb,lsopt); - cnt = cnt+cnt1; - end -end - -%----------------------------------------------------------------------------------- -function [cachet,a,b,fa,dfa,fb,dfb,t,exitflag,cnt] = FletcherBracket(cache,prob,gam,f0,df0,t0,lsopt) -% -% bracketingPhase finds a bracket [a,b] that contains acceptable points; a bracket -% is the same as a closed interval, except that a > b is allowed. -% -% The outputs fa and dfa are the values of the function and the derivative -% evaluated at the bracket endpoint 'a'. Similar notation applies to the endpoint -% 'b'. The possible values of exitflag are like in LINESEARCH, with the additional -% value exitflag = 2, which indicates that a bracket containing acceptable points -% was found. - - % Q, A, C, f2, proxg - cnt = [0, 0, 0, 0, 0]; - tau1 = 9; % factor to expand the current bracket - a = []; b = []; fa = []; dfa = []; fb = []; dfb = []; - ft = f0; dft = df0; - - % Set maximum value of t (determined by fminimum) - tmax = (lsopt.fmin - f0)/(lsopt.wolfe_hi); - told = 0; - - % First trial t is user-supplied - t = t0; - for nbracket = 1:lsopt.nbracket - fold = ft; dfold = dft; - [cachet, cnt1] = LineFBE(prob, gam, t, cache, 3); - cnt = cnt+cnt1; - ft = cachet.FBE; dft = cachet.dFBE; - - % Terminate if f < fminimum - if ft <= lsopt.fmin - exitflag = 1; - return - end - - % Bracket located - case 1 - if ft > f0 + t*lsopt.wolfe_hi || ft >= fold - a = told; fa = fold; dfa = dfold; - b = t; fb = ft; dfb = dft; - exitflag = 2; - return - end - - % Acceptable steplength found; no need to call sectioning phase - if abs(dft) <= -lsopt.wolfe_lo - exitflag = 0; - return - end - - % Bracket located - case 2 - if dft >= 0 - a = t; fa = ft; dfa = dft; - b = told; fb = fold; dfb = dfold; - exitflag = 2; - return - end - - % Update t - if 2*t - told < tmax % if t + (t - told) < tmax - lb = 2*t-told; % lb = t + (t - told) >= tmax - ub = min(tmax,t+tau1*(t-told)); - tnew = FletcherCubInterp(told,fold,dfold,t,ft,dft); - tnew = min(max(tnew,lb),ub); - told = t; - t = tnew; - else - t = tmax; - end - end - - % We reach this point if and only if maxnf was reached - exitflag = -1; -end - -%----------------------------------------------------------------------------------- -function [cachet,t,exitflag,cnt] = FletcherSection(cache,prob,gam,f0,a,fa,dfa,b,fb,dfb,lsopt) -% -% sectioningPhase finds an acceptable point t within a given bracket [a,b] -% containing acceptable points. Notice that funcCount counts the total number of -% function evaluations including those of the bracketing phase. - - % Q, A, C, f2, proxg - cnt = [0, 0, 0, 0, 0]; - tau2 = min(0.1, lsopt.sigma); - tau3 = 0.5; - - t = []; - for nsection = 1:lsopt.nsection - - % Pick t in reduced interval - lb = a + tau2*(b - a); ub = b - tau3*(b - a); - % Find global minimizer in [lb, ub] of 3rd-degree polynomial that interpolates - % f() and f'() at "a" and at "b". - t = FletcherCubInterp(a,fa,dfa,b,fb,dfb); - t = min(max(t,lb),ub); - - [cachet, cnt1] = LineFBE(prob, gam, t, cache, 3); - cnt = cnt+cnt1; - ft = cachet.FBE; dft = cachet.dFBE; - - if (t - a)*dfa >= lsopt.progTol || abs(b - a)*norm(cache.dir) < lsopt.progTol - exitflag = -2; % No further progress can be made - return - end - - % Update bracket - aold = a; faold = fa; dfaold = dfa; - bold = b; fbold = fb; dfbold = dfb; - if ft > f0 + t*lsopt.wolfe_hi || ft >= fa - a = aold; fa = faold; dfa = dfaold; - b = t; fb = ft; dfb = dft; - else - if abs(dft) <= -lsopt.wolfe_lo - exitflag = 0; % Acceptable point found - return - end - a = t; fa = ft; dfa = dft; - if (b - a)*dft >= 0 - b = aold; fb = faold; dfb = dfaold; - else - b = bold; fb = fbold; dfb = dfbold; - end - end - end % of while - - % We reach this point if and only if maxnf was reached - exitflag = -1; -end - -function t = FletcherCubInterp(t1,f1,df1,t2,f2,df2) - % t1, t2 might not be sorted - delta = t2 - t1 ; - if delta == 0 - t = t1; - else - d1 = df1 + df2 - 3*(f2-f1)/delta; - d2 = d1^2 - df1*df2 ; - if d2 < 0 % /* complex roots, use secant method */ - if ( abs(df1) < abs (df2) ) - t = t1 - (t1 - t2)*(df1/(df1-df2)) ; - elseif ( df1 ~= df1 ) - t = t2 - (t1 - t2)*(df2/(df1-df2)) ; - else - t = - 1 ; - end - else - % first way: from Hager-Zhang code - % d2 = sqrt(d2)*sign(delta); - % v1 = df1 + d1 - d2 ; - % v2 = df2 + d1 + d2 ; - % if ( (v1 == 0) && (v2 == 0) ) - % t = -1; - % elseif ( abs (v1) >= abs (v2) ) - % t = t1 + delta*df1/v1 ; - % else - % t = t2 - delta*df2/v2 ; - % end - % - % second way: from Bonnans, Lemarechal - d2 = sqrt(d2)*sign(delta); - v1 = df2 + d2 - d1; - v2 = df2 - df1 + 2*d2; - if ( (v1 == 0) && (v2 == 0) ) - t = -1; - else - t = t2 - delta*(v1/v2); - end - end - end -end diff --git a/private/LineSearch_HagerZhang.m b/private/LineSearch_HagerZhang.m deleted file mode 100755 index ca71c8f..0000000 --- a/private/LineSearch_HagerZhang.m +++ /dev/null @@ -1,468 +0,0 @@ -function [alpha, cachet, cachet1, ops, lsopt, info] = LineSearch_HagerZhang(cache, dir, slope, t0, lsopt, varargin) -% Hager Zhang line search based on CG-DESCENT Version 6.7 (April 7, 2014) -% Approximate Wolfe line search routine -% info: -% 0 (Wolfe or approximate Wolfe conditions satisfied) -% 3 (slope always negative in line search) -% 4 (number of line search iterations exceed nline) -% 6 (excessive updating of eps) -% 7 (Wolfe conditions never satisfied) -% ========================================================================= */ - - % precompute stuff for the line search - [cache, ops] = Cache_LineSearch(cache, dir); - - cachet1 = []; - - AWolfe = lsopt.AWolfe; - alpha = t0; - f0 = cache.FBE; - - if lsopt.PertRule - epsilon = lsopt.eps*abs(f0); - else - epsilon = lsopt.eps; - end - - lsopt.wolfe_hi = lsopt.delta*slope; - lsopt.wolfe_lo = lsopt.sigma*slope; - lsopt.awolfe_hi = (2*lsopt.delta - 1)*slope; - lsopt.fpert = f0 + epsilon; - - % evaluate function or gradient at alpha (starting guess) - if ( lsopt.QuadOK ) - [cachet, ops1] = LineFBE(cache, alpha, 3); - ops = Ops_Sum(ops, ops1); - f = cachet.FBE; df = cachet.dFBE; - fb = f; - if ( ~AWolfe ), fb = fb - alpha*lsopt.wolfe_hi ;end - qb = true ; % function value at b known - else - [cachet, ops1] = LineFBE(cache, alpha, 2); - ops = Ops_Sum(ops, ops1); - df = cachet.dFBE; - qb = false ; - end - b = alpha ; - - if ( AWolfe ) - db = df ; - d0 = slope; - da = slope; - else - db = df - lsopt.wolfe_hi ; - d0 = slope - lsopt.wolfe_hi ; - da = d0; - end - a = 0 ; - a1 = 0 ; - d1 = d0 ; - fa = f0 ; - - % if a quadratic interpolation step performed, check Wolfe conditions */ - if ( (lsopt.QuadOK) && (f <= f0) ) - if ( HagerZhangTestWolfe (alpha, f, df, f0,lsopt) ), info = 0; return ;end - end - - % Find initial interval [a,b] such that - % da <= 0, db >= 0, fa <= fpert = [(f0 + eps*abs(f0)) or (f0 + eps)] */ - rho = lsopt.rho ; - ngrow = 1 ; - while ( db < 0 ) - if ( ~qb ) - [cachet, ops1] = LineFBE(cache, alpha, 1, cachet); - ops = Ops_Sum(ops, ops1); - f = cachet.FBE; - if ( AWolfe ) - fb = f ; - else - fb = f - b*lsopt.wolfe_hi ; - end - qb = true ; - end - if ( fb > lsopt.fpert ) % contract interval [a, b] - [a,fa,da,b,fb,db,alpha,status,lsopt,cachet,cnt1] = HagerZhangUpdate (a, fa, da, b, fb, db,prob,gam,cache,lsopt,f0) ; - cnt = cnt+cnt1; - if ( status == 0 ), info = 0; return ;end % /* Wolfe conditions hold */ - if ( status == -2 ), break,end ; %/* db >= 0 */ - if ( lsopt.neps > 0 ), info = 6;return,end - end - - % expansion phase - ngrow = ngrow +1 ; - if ( ngrow > lsopt.nexpand ), info = 3; return,end - % update interval (a replaced by b) */ - a = b ; - fa = fb ; - da = db ; - % store old values of a and corresponding derivative - d2 = d1 ; - d1 = da ; - a2 = a1 ; - a1 = a ; - - bmin = rho*b ; - if ( (ngrow == 2) || (ngrow == 3) || (ngrow == 6) ) - if ( d1 > d2 ) - if ( ngrow == 2 ) - b = a1 - (a1-a2)*(d1/(d1-d2)) ; - else - if ( (d1-d2)/(a1-a2) >= (d2-d0)/a2 ) - % convex derivative, secant overestimates minimizer - b = a1 - (a1-a2)*(d1/(d1-d2)) ; - else - % concave derivative, secant underestimates minimizer - b = a1 - lsopt.SecantAmp*(a1-a2)*(d1/(d1-d2)) ; - end - end - % safeguard growth - b = min (b, lsopt.ExpandSafe*a1) ; - else - rho = rho*lsopt.RhoGrow ; - end - else - rho = rho*lsopt.RhoGrow ; - end - b = max (bmin, b) ; - alpha = b ; - [cachet, ops1] = LineFBE(cache, alpha, 2); - ops = Ops_Sum(ops, ops1); - df = cachet.dFBE; - b = alpha ; - qb = false ; - if ( AWolfe ) - db = df ; - else - db = df - lsopt.wolfe_hi ; - end - - end - - % /* we now have fa <= fpert, da >= 0, db <= 0 */ - toggle = 0 ; - width = b - a ; - qb0 = false ; - for iter = 0:lsopt.nsecant-1 - % /* determine the next iterate */ - if ( (toggle == 0) || ((toggle == 2) && ((b-a) <= width)) ) - lsopt.QuadOK = true ; - if ( lsopt.UseCubic && qb ) - alpha = HagerZhangCubInterp (a, fa, da, b, fb, db) ; - if ( alpha < 0 ) %/* use secant method */ - if ( -da < db ) - alpha = a - (a-b)*(da/(da-db)) ; - elseif ( da ~= db ) - alpha = b - (a-b)*(db/(da-db)) ; - else - alpha = -1. ; - end - end - else - if ( -da < db ) - alpha = a - (a-b)*(da/(da-db)) ; - elseif ( da ~= db ) - alpha = b - (a-b)*(db/(da-db)) ; - else - alpha = -1. ; - end - end - width = lsopt.gamma*(b - a) ; - - elseif ( toggle == 1 ) %/* iteration based on smallest value*/ - lsopt.QuadOK = true ; - if ( lsopt.UseCubic ) - if ( alpha == a ) %/* a is most recent iterate */ - alpha = HagerZhangCubInterp (a0, fa0, da0, a, fa, da) ; - elseif ( qb0 ) %/* b is most recent iterate */ - alpha = HagerZhangCubInterp (b, fb, db, b0, fb0, db0) ; - else - alpha = -1. ; - end - % /* if alpha no good, use cubic between a and b */ - if ( (alpha <= a) || (alpha >= b) ) - if ( qb ) - alpha = HagerZhangCubInterp (a, fa, da, b, fb, db) ; - else - alpha = -1. ; - end - end - - % /* if alpha still no good, use secant method */ - if ( alpha < 0 ) - if ( -da < db ) - alpha = a - (a-b)*(da/(da-db)) ; - elseif ( da ~= db ) - alpha = b - (a-b)*(db/(da-db)) ; - else - alpha = -1. ; - end - end - else %/* ( use secant ) */ - if ( (alpha == a) && (da > da0) ) %/* use a0 if possible */ - alpha = a - (a-a0)*(da/(da-da0)) ; - elseif ( db < db0 ) %/* use b0 if possible */ - alpha = b - (b-b0)*(db/(db-db0)) ; - else %/* secant based on a and b */ - if ( -da < db ) - alpha = a - (a-b)*(da/(da-db)) ; - elseif ( da ~= db ) - alpha = b - (a-b)*(db/(da-db)) ; - else - alpha = -1. ; - end - end - if ( (alpha <= a) || (alpha >= b) ) - if ( -da < db ) - alpha = a - (a-b)*(da/(da-db)) ; - elseif ( da ~= db ) - alpha = b - (a-b)*(db/(da-db)) ; - else - alpha = -1. ; - end - end - end - else - alpha = .5*(a+b) ; %/* use bisection if b-a decays slowly */ - lsopt.QuadOK = false ; - end - if ( (alpha <= a) || (alpha >= b) ) - alpha = .5*(a+b) ; - if ( (alpha == a) || (alpha == b) ), - info = 7; - return ; - end - lsopt.QuadOK = false ; %/* bisection was used */ - end - - if ( toggle == 0 ) %/* save values for next iteration */ - a0 = a ; - b0 = b ; - da0 = da ; - db0 = db ; - fa0 = fa ; - if ( qb ) - fb0 = fb ; - qb0 = true ; - end - end - - toggle = toggle + 1 ; - if ( toggle > 2 ), toggle = 0 ;end - [cachet, ops1] = LineFBE(cache, alpha, 3); - ops = Ops_Sum(ops, ops1); - f = cachet.FBE;df = cachet.dFBE; - if ( lsopt.QuadOK ) - if ( HagerZhangTestWolfe (alpha, f, df, f0,lsopt) ) - info = 0; - return - end - end - - if ( ~AWolfe ) - f = f - alpha*lsopt.wolfe_hi ; - df = df - lsopt.wolfe_hi ; - end - - if ( df >= 0 ) - b = alpha ; - fb = f ; - db = df ; - qb = true ; - elseif ( f <= lsopt.fpert ) - a = alpha ; - da = df ; - fa = f ; - else - B = b ; - if ( qb ), fB = fb ;end - dB = db ; - b = alpha ; - fb = f ; - db = df ; - % /* contract interval [a, alpha] */ - [a,fa,da,b,fb,db,alpha,status,lsopt,cachet,ops1] = HagerZhangUpdate (a,fa,da,b,fb,db,cache,lsopt,f0) ; - ops = Ops_Sum(ops, ops1); - if ( status == 0 ), info = 0; return; end - if ( status == -1 ) %/* eps reduced, use [a, b] = [alpha, b] */ - if ( lsopt.neps > 5 ), info = 6; return; end - a = b ; - fa = fb ; - da = db ; - b = B ; - if ( qb ), fb = fB ;end - db = dB ; - else - qb = true ; - end - end - end - info = 4; -end - -% /* ========================================================================= -% ==== update ======================================================== -% ========================================================================= -% The input for this routine is an interval [a, b] with the property that -% fa <= fpert, da >= 0, db >= 0, and fb >= fpert. The returned status is -% -% 11 function or derivative not defined -% 0 if the Wolfe conditions are satisfied -% -1 if a new value for eps is generated with the property that for the -% corresponding fpert, we have fb <= fpert -% -2 if a subinterval, also denoted [a, b], is generated with the property -% that fa <= fpert, da >= 0, and db <= 0 -% -% NOTE: The input arguments are unchanged when status = -1 -% ========================================================================= */ -function [a,fa,da,b,fb,db,alpha,info,lsopt,cachet,ops] = HagerZhangUpdate(a,fa,da,b,fb,db,cache,lsopt,f0) - ops = Ops_Init(); - AWolfe = lsopt.AWolfe ; - f1 = fb ; - toggle = 0 ; - width = 0 ; - for iter=0:lsopt.ncontract - if ( (toggle == 0) || ((toggle == 2) && ((b-a) <= width)) ) - % /* cubic based on bracketing interval */ - alpha = HagerZhangCubInterp (a, fa, da, b, fb, db) ; - toggle = 0 ; - width = lsopt.gamma*(b-a) ; - if ( iter ), - lsopt.QuadOK = true ; - end %/* at least 2 cubic iterations */ - elseif ( toggle == 1 ) - lsopt.QuadOK = true ; - % /* cubic based on most recent iterate and smallest value */ - if ( old < a ) %/* a is most recent iterate */ - alpha = HagerZhangCubInterp (a, fa, da, old, fold, dold) ; - else % /* b is most recent iterate */ - alpha = HagerZhangCubInterp (a, fa, da, b, fb, db) ; - end - else - alpha = .5*(a+b) ; %/* use bisection if b-a decays slowly */ - lsopt.QuadOK = false ; - end - - if ( (alpha <= a) || (alpha >= b) ) - alpha = .5*(a+b) ; - lsopt.QuadOK = false ; %/* bisection was used */ - end - toggle = toggle + 1 ; - if ( toggle > 2 ) - toggle = 0 ; - end - [cachet, ops1] = LineFBE(cache, alpha, 3); - ops = Ops_Sum(ops, ops1); - f = cachet.FBE; df = cachet.dFBE; - - if ( lsopt.QuadOK ) - if ( HagerZhangTestWolfe (alpha, f, df, f0, lsopt) ), - info = 0; - return - end - end - - if ( ~AWolfe ) - f = f - alpha*lsopt.wolfe_hi ; - df = df - lsopt.wolfe_hi ; - end - if ( df >= 0 ) - a = alpha ; - fb = f ; - db = df ; - info = -2; - return - end - if ( f <= lsopt.fpert ) %/* update a using alpha */ - old = a ; - fold = fa ; - dold = da ; - a = alpha ; - fa = f ; - da = df ; - else %/* update b using alpha */ - old = b ; - fold = fb; - dold = db; - b = alpha ; - fb = f ; - db = df ; - end - - end - - %% This might need debugging - % see if the cost is small enough to change the PertRule - if ( abs (fb) <= lsopt.SmallCost ), - lsopt.PertRule = false ; - end - - % increase eps if slope is negative after Parm->nshrink iterations - if ( lsopt.PertRule ) - if ( f0 ~= 0) - lsopt.eps = lsopt.egrow*(f1-f0)/abs (f0) ; - lsopt.fpert = f0 + abs (f0)*lsopt.eps ; - else - lsopt.fpert = 2*f1 ; - end - else - lsopt.eps = lsopt.egrow*(f1-f0) ; - lsopt.fpert = f0 + lsopt.eps ; - end - lsopt.neps = lsopt.neps+1 ; - info = -1 ; -end - -function done = HagerZhangTestWolfe (alpha, f, df,f0,lsopt) - done = false; - if ( df >= lsopt.wolfe_lo ) - % c test original Wolfe conditions - if ( f-f0 <= alpha*lsopt.wolfe_hi ) - done = true; - % c test approximate Wolfe conditions - elseif ( lsopt.AWolfe ) - done = ( (f <= lsopt.fpert) & (df <= lsopt.awolfe_hi)); - end - end -end - -function t = HagerZhangCubInterp(t1,f1,df1,t2,f2,df2) - % t1, t2 might not be sorted - delta = t2 - t1 ; - if delta == 0 - t = t1; - else - d1 = df1 + df2 - 3*(f2-f1)/delta; - d2 = d1^2 - df1*df2 ; - if d2 < 0 % /* complex roots, use secant method */ - if ( abs(df1) < abs (df2) ) - t = t1 - (t1 - t2)*(df1/(df1-df2)) ; - elseif ( df1 ~= df1 ) - t = t2 - (t1 - t2)*(df2/(df1-df2)) ; - else - t = - 1 ; - end - else - % first way: from Hager-Zhang code - d2 = sqrt(d2)*sign(delta); - v1 = df1 + d1 - d2 ; - v2 = df2 + d1 + d2 ; - if ( (v1 == 0) && (v2 == 0) ) - t = -1; - elseif ( abs (v1) >= abs (v2) ) - t = t1 + delta*df1/v1 ; - else - t = t2 - delta*df2/v2 ; - end - % - % second way: from Bonnans, Lemarechal - % d2 = sqrt(d2)*sign(delta); - % v1 = df2 + d2 - d1; - % v2 = df2 - df1 + 2*d2; - % if ( (v1 == 0) && (v2 == 0) ) - % t = -1; - % else - % t = t2 - delta*(v1/v2); - % end - end - end -end diff --git a/private/LineSearch_HagerZhangNonmonotone.m b/private/LineSearch_HagerZhangNonmonotone.m deleted file mode 100755 index e48cd50..0000000 --- a/private/LineSearch_HagerZhangNonmonotone.m +++ /dev/null @@ -1,76 +0,0 @@ -function [t, cachet, cachet1, ops, exitflag] = LineSearch_HagerZhangNonmonotone(cache, dir, slope, t0, lsopt, ref) -%ARMIJOLS - computes a steplength t > 0 so that it satisfies the Armijo condition -% -% f(t) <= f(0) + delta*f'(0) -% -% exitflag = -1: gam is not small enough -% exitflag = 0: acceptable steplength was found -% exitflag = 1: maximum number of backtracking steps exceeded -% exitflag = 2: no further progress can be made - - % precompute stuff for the line search - [cache, ops] = CacheLineSearch(cache, dir); - - cachet1 = []; - - gam = cache.gam; - - arm_hi = lsopt.delta*slope; - t = t0; - exitflag = 1; - if nargin >= 5 - f0 = ref; - else - f0 = cache.FBE; - end - for i = 1:lsopt.nLS - [cachet, ops1] = LineFBE(cache, t, 1); - ops = OpsSum(ops, ops1); - ft = cachet.FBE; - if ft <= f0 + t*arm_hi - exitflag = 0; - break; - end - if i == 1 %quadratic interpolation - tn = ArmijoQuadInterp(f0, slope, t, ft); - else %cubic interpolation - tn = ArmijoCubInterp(f0, slope, told, ftold, t, ft); - end - if tn <= 0 - tn = 0.5*t; - end - told = t; - ftold = ft; - t = tn; - if t <= lsopt.progTol - exitflag = 2; - break - end - end - if exitflag == 0 && lsopt.testGamma - [flagGamma, cachet1, ops1] = CheckGamma(cachet, gam, beta); - ops = OpsSum(ops, ops1); - exitflag = flagGamma-1; % because CheckGamma returns 1 (good gamma) or 0 (bad gamma) - end -end - -function t = ArmijoQuadInterp(f0,df0,t,ft) - % Minimizer of interpolant belongs to [0,t1] - tdf0 = t*df0; - q = ft-f0-tdf0; - if q > 0%quadratic is strongly convex - t = -(tdf0*t)/(2*q); - else - t = -1; - end -end - -function t = ArmijoCubInterp(f,df,t0,f0,t1,f1) - % Minimizer of interpolant belongs to [0,t1] - t02 = t0^2; - t12 = t1^2; - ab = 1/(t02*t12*(t1-t0))*[t02 -t12;-t0^3 t1^3]*[f1-f-df*t1;f0-f-df*t0]; - a = ab(1); - b = ab(2); - t = (-b+sqrt(b^2-3*a*df))/(3*a); -end diff --git a/private/LineSearch_Lemarechal.m b/private/LineSearch_Lemarechal.m deleted file mode 100755 index 8120f86..0000000 --- a/private/LineSearch_Lemarechal.m +++ /dev/null @@ -1,168 +0,0 @@ -function [t, cachet, cachet1, lsopt, exitflag] = LineSearch_Lemarechal(cache, dir, slope, t0, lsopt, adaptive, varargin) -%LEMARECHALLS - computes a steplength t > 0 so that it satisfies the (weak) Wolfe conditions -% -% f(t) <= f(0) + delta*f'(0) -% f'(t) >= sigma*f'(0). -% -% exitflag = -1: gam is not small enough -% exitflag = 0: acceptable steplength was found -% exitflag = 1: maximum number of bracketing or sectioning iterations reached -% exitflag = 2: no further progress can be made -% -% Algorithm is described in Figure 1 of -% -% C. Lemarechal, A view of line searches, in: Optimization and Optimal Control, Auslender, -% Oettli, Stoer Eds, Lecture Notes in Control and Information Sciences 30, -% Springer Verlag (1981) -% -% see also -% -% J.-B. Hiriart-Urruty and C. Lemarechal (1996). -% Convex Analysis and Minimization Algorithms, vol I. -% Springer Verlag, Heidelberg, Algorithm 3.3.1 (Wolfe's line-search) - - % precompute stuff for the line search - cache.Set_Directions(dir); - - cachet1 = []; - - gam = cache.gam; - - t = t0; - wolfe_hi = lsopt.delta*slope; - wolfe_lo = lsopt.sigma*slope; - a = 0; fa = cache.Get_FBE(); dfa = slope; - tprev = a; fprev = fa; dfprev = dfa; - b = inf; % upper bound - rho = lsopt.rho; - theta = lsopt.theta; - exitflag = 1; - testGammaFlag = 0; - for it = 1:lsopt.nbracket - cachet = cache.Get_CacheLine(t, 1); - if adaptive && testGammaFlag - [isGammaOK, cachet1] = cache.Check_Gamma(lsopt.beta); - exitflag = ~isGammaOK; - end - testGammaFlag = 0; - if cachet.FBE > cache.FBE + t*wolfe_hi - b = t; fb = cachet.FBE; - if lsopt.interp == 1 - tn = LemarechalQuadInterp(a,fa,dfa,b,fb); - % safeguard - tn = min(tn,b - theta*(b - a)); - tn = max(tn,a + theta*(b - a)); - elseif lsopt.interp == 2 - cachet = cache.Get_CacheLine(t, 3, cachet); - dfb = cachet.Get_Slope(); - tn = LemarechalCubInterp(a,fa,dfa,b,fb,dfb); - % safeguard - tn = min(tn,b - theta*(b - a)); - tn = max(tn,a + theta*(b - a)); - else - tn = 0.5*(a + b); - end - t = tn; - else - cachet = cache.Get_CacheLine(t, 2, cachet); - if cachet.dFBE < wolfe_lo - a = t; fa = cachet.FBE; dfa = cachet.dFBE; - if b == inf - % extrapolate - if lsopt.interp % we always have dfprev - tn = LemarechalCubInterp(tprev,fprev,dfprev,a,fa,dfa); - % safeguard - tn = max(tn,rho*tprev); - else - tn = rho*t; - end - testGammaFlag = 1; - else - % interpolate - if lsopt.interp == 1 - tn = LemarechalQuadInterp(a,fa,dfa,b,fb); - % safeguard - tn = min(tn,b - theta*(b - a)); - tn = max(tn,a + theta*(b - a)); - elseif lsopt.interp == 2 - tn = LemarechalCubInterp(a,fa,dfa,b,fb,dfb); - % safeguard - tn = min(tn,b - theta*(b - a)); - tn = max(tn,a + theta*(b - a)); - else - tn = 0.5*(a + b); - end - end - tprev = t;fprev = fa;dfprev = dfa; - t = tn; - else - exitflag = 0; - break; - end - end - - if (b-a) <= lsopt.progTol - exitflag = 2; - break; - end - end - - if exitflag == 0 && adaptive - [isGammaOK, cachet1] = cachet.Check_Gamma(lsopt.beta); - exitflag = ~isGammaOK; - end -end - -function t = LemarechalQuadInterp(t0,f0,df0,t1,f1) - % Minimizer of interpolant belongs to [0,t1] - q = f1-f0-t1*df0; - q = 2*(f1-f0-(t1-t0)*df0)/(t1-t0)^2; - if q > 0%quadratic is strongly convex - c2 = df0-t0*q; - t = -c2/q; - else - t = -1; - end -end - -function t = LemarechalCubInterp(t1,f1,df1,t2,f2,df2) - % t1, t2 might not be sorted - delta = t2 - t1 ; - if delta == 0 - t = t1; - else - d1 = df1 + df2 - 3*(f2-f1)/delta; - d2 = d1^2 - df1*df2 ; - if d2 < 0 % /* complex roots, use secant method */ - if ( abs(df1) < abs (df2) ) - t = t1 - (t1 - t2)*(df1/(df1-df2)) ; - elseif ( df1 ~= df1 ) - t = t2 - (t1 - t2)*(df2/(df1-df2)) ; - else - t = - 1 ; - end - else - % first way: from Hager-Zhang code - % d2 = sqrt(d2)*sign(delta); - % v1 = df1 + d1 - d2 ; - % v2 = df2 + d1 + d2 ; - % if ( (v1 == 0) && (v2 == 0) ) - % t = -1; - % elseif ( abs (v1) >= abs (v2) ) - % t = t1 + delta*df1/v1 ; - % else - % t = t2 - delta*df2/v2 ; - % end - % - % second way: from Bonnans, Lemarechal - d2 = sqrt(d2)*sign(delta); - v1 = df2 + d2 - d1; - v2 = df2 - df1 + 2*d2; - if ( (v1 == 0) && (v2 == 0) ) - t = -1; - else - t = t2 - delta*(v1/v2); - end - end - end -end diff --git a/private/LineSearch_MoreThuente.m b/private/LineSearch_MoreThuente.m deleted file mode 100755 index 8c15bb5..0000000 --- a/private/LineSearch_MoreThuente.m +++ /dev/null @@ -1,485 +0,0 @@ -function [cachet, t, cnt, info ] = LineSearch_MoreThuente(prob, gam, cache, df0, lsopt) -% Function MoreThuenteLS -% -% The purpose of MoreThuenteLS is to find a step which satisfies -% a sufficient decrease condition and a curvature condition. -% -% At each stage MoreThuenteLS updates an interval of -% uncertainty with endpoints stx and sty. The interval of -% uncertainty is initially chosen so that it contains a -% minimizer of the modified function -% -% f(x+stp*s) - f(x) - delta*stp*(gradf(x)'s). -% -% If a step is obtained for which the modified function -% has a nonpositive function value and nonnegative derivative, -% then the interval of uncertainty is chosen so that it -% contains a minimizer of f(x+stp*s). -% -% The algorithm is designed to find a step which satisfies -% the sufficient decrease condition -% -% f(x+stp*s) <= f(x) + delta*stp*(gradf(x)'s), -% -% and the curvature condition -% -% abs(gradf(x+stp*s)'s)) <= sigma*abs(gradf(x)'s). -% -% If delta is less than sigma and if, for example, the function -% is bounded below, then there is always a step which satisfies -% both conditions. If no step can be found which satisfies both -% conditions, then the algorithm usually stops when rounding -% errors prevent further progress. In this case stp only -% satisfies the sufficient decrease condition. -% -% -% stp is a nonnegative variable. On input stp contains an -% initial estimate of a satisfactory step. On output -% stp contains the final estimate. -% -% delta and sigma are nonnegative input variables. Termination -% occurs when the sufficient decrease condition and the -% directional derivative condition are satisfied. -% -% progTol is a nonnegative input variable. Termination occurs -% when the relative width of the interval of uncertainty -% is at most progTol. -% -% tmin and tmax are nonnegative input variables which -% specify lower and upper bounds for the step. -% -% maxfev is a positive integer input variable. Termination -% occurs when the number of calls to fcn is at least -% maxfev by the end of an iteration. -% -% info is an integer output variable set as follows: -% -% info = 0 The sufficient decrease condition and the -% directional derivative condition hold. -% -% info = -2 Relative width of the interval of uncertainty -% is at most progTol. -% -% info = -1 Number of calls to fcn has reached maxfev. -% -% info = 4 The step is at the lower bound tmin. -% -% info = 5 The step is at the upper bound tmax. -% -% info = 6 Rounding errors prevent further progress. -% There may not be a step which satisfies the -% sufficient decrease and curvature conditions. -% Tolerances may be too small. -% -% nf is an integer output variable set to the number of -% calls to fcn. -% -% wa is a work array of length n. -% -% Subprograms called -% -% user-supplied......fcn -% -% MINPACK-supplied...cstep -% -% FORTRAN-supplied...abs,max,min -% -% Argonne National Laboratory. MINPACK Project. June 1983 -% Jorge J. More', David J. Thuente -% -% ********** - % Q, A, C, f2, proxg - cnt = [0, 0, 0, 0, 0]; - nf = 0; - p5 = .5; - p66 = .66; - xtrapf = 4; - info = 0; - infoc = 1; - - % Compute the initial gradient in the search direction - % and check that s is a descent direction. - % - - % - % Initialize local variables. - % - t = lsopt.tau0; - f = cache.FBE; - brackt = 0; - stage1 = 1; - finit = f; - dgtest = lsopt.delta*df0; - width = lsopt.tmax - lsopt.tmin; - width1 = 2*width; - % - % The variables stx, fx, dgx contain the values of the step, - % function, and directional derivative at the best step. - % The variables sty, fy, dgy contain the value of the step, - % function, and derivative at the other endpoint of - % the interval of uncertainty. - % The variables stp, f, dg contain the values of the step, - % function, and derivative at the current step. - % - stx = 0; - fx = finit; - dgx = df0; - sty = 0; - fy = finit; - dgy = df0; - % - % Start of iteration. - % - while (1) - % - % Set the minimum and maximum steps to correspond - % to the present interval of uncertainty. - % - if (brackt) - stmin = min(stx,sty); - stmax = max(stx,sty); - else - stmin = stx; - stmax = t + xtrapf*(t - stx); - end - % - % Force the step to be within the bounds tmax and tmin. - % - t = max(t,lsopt.tmin); - t = min(t,lsopt.tmax); - % - % If an unusual termination is to occur then let - % t be the lowest point obtained so far. - % - if ((brackt && (t <= stmin || t >= stmax)) || nf >= lsopt.maxfev-1 || infoc == 0 || (brackt && stmax-stmin <= lsopt.progTol*stmax)) - t = stx; - end - % - % Evaluate the function and gradient at t - % and compute the directional derivative. - % - [cachet, cnt1] = LineFBE(prob, gam, t, cache, 3); - cnt = cnt+cnt1; - nf = nf+1; - f = cachet.FBE; dg = cachet.dFBE; - - ftest1 = finit + t*dgtest; - % - % Test for convergence. - % - if ((brackt && (t <= stmin || t >= stmax)) || infoc == 0) - % Rounding errors prevent further progress - info = 6; - return - end - if (t == lsopt.tmax && f <= ftest1 && dg <= dgtest) - % The step is at the upper bound tmax - info = 5; - return - end - if (t == lsopt.tmin && (f > ftest1 || dg >= dgtest)) - % The step is at the lower bound tmin - info = 4; - return - end - if (nf >= lsopt.maxfev) - info = -1; - return - end - if (brackt && stmax-stmin <= lsopt.progTol*stmax) - info = -2; - return - end - if (f <= ftest1 && abs(dg) <= lsopt.sigma*(-df0)) - info = 0; - return - end - % - % In the first stage we seek a step for which the modified - % function has a nonpositive value and nonnegative derivative. - % - if (stage1 && f <= ftest1 && dg >= min(lsopt.delta,lsopt.sigma)*df0) - stage1 = 0; - end - % - % A modified function is used to predict the step only if - % we have not obtained a step for which the modified - % function has a nonpositive function value and nonnegative - % derivative, and if a lower function value has been - % obtained but the decrease is not sufficient. - % - if (stage1 && f <= fx && f > ftest1) - % - % Define the modified function and derivative values. - % - fm = f - t*dgtest; - fxm = fx - stx*dgtest; - fym = fy - sty*dgtest; - dgm = dg - dgtest; - dgxm = dgx - dgtest; - dgym = dgy - dgtest; - % - % Call cstep to update the interval of uncertainty - % and to compute the new step. - % - [stx,fxm,dgxm,sty,fym,dgym,t,fm,dgm,brackt,infoc] = MoreThuenteCstep(stx,fxm,dgxm,sty,fym,dgym,t,fm,dgm,brackt,stmin,stmax); - % - % Reset the function and gradient values for f. - % - fx = fxm + stx*dgtest; - fy = fym + sty*dgtest; - dgx = dgxm + dgtest; - dgy = dgym + dgtest; - else - % - % Call cstep to update the interval of uncertainty - % and to compute the new step. - % - [stx,fx,dgx,sty,fy,dgy,t,f,dg,brackt,infoc] = MoreThuenteCstep(stx,fx,dgx,sty,fy,dgy,t,f,dg,brackt,stmin,stmax); - end - % - % Force a sufficient decrease in the size of the - % interval of uncertainty. - % - if (brackt) - if (abs(sty-stx) >= p66*width1) - t = stx + p5*(sty - stx); - end - width1 = width; - width = abs(sty-stx); - end - % - % End of iteration. - % - end -end - -function [stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,info] = MoreThuenteCstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,stpmin,stpmax) -% Subroutine cstep -% -% The purpose of cstep is to compute a safeguarded step for -% a linesearch and to update an interval of uncertainty for -% a minimizer of the function. -% -% The parameter stx contains the step with the least function -% value. The parameter stp contains the current step. It is -% assumed that the derivative at stx is negative in the -% direction of the step. If brackt is set true then a -% minimizer has been bracketed in an interval of uncertainty -% with endpoints stx and sty. -% -% The subroutine statement is -% -% subroutine cstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt, -% stpmin,stpmax,info) -% -% where -% -% stx, fx, and dx are variables which specify the step, -% the function, and the derivative at the best step obtained -% so far. The derivative must be negative in the direction -% of the step, that is, dx and stp-stx must have opposite -% signs. On output these parameters are updated appropriately. -% -% sty, fy, and dy are variables which specify the step, -% the function, and the derivative at the other endpoint of -% the interval of uncertainty. On output these parameters are -% updated appropriately. -% -% stp, fp, and dp are variables which specify the step, -% the function, and the derivative at the current step. -% If brackt is set true then on input stp must be -% between stx and sty. On output stp is set to the new step. -% -% brackt is a logical variable which specifies if a minimizer -% has been bracketed. If the minimizer has not been bracketed -% then on input brackt must be set false. If the minimizer -% is bracketed then on output brackt is set true. -% -% stpmin and stpmax are input variables which specify lower -% and upper bounds for the step. -% -% info is an integer output variable set as follows: -% If info = 1,2,3,4,5, then the step has been computed -% according to one of the five cases below. Otherwise -% info = 0, and this indicates improper input parameters. -% -% Subprograms called -% -% FORTRAN-supplied ... abs,max,min,sqrt -% ... dble -% -% Argonne National Laboratory. MINPACK Project. June 1983 -% Jorge J. More', David J. Thuente -% -% ********** - p66 = 0.66; - info = 0; - % - % Check the input parameters for errors. - % - if ((brackt && (stp <= min(stx,sty) || stp >= max(stx,sty))) || dx*(stp-stx) >= 0.0 || stpmax < stpmin) - return - end - % - % Determine if the derivatives have opposite sign. - % - sgnd = dp*(dx/abs(dx)); - % - % First case. A higher function value. - % The minimum is bracketed. If the cubic step is closer - % to stx than the quadratic step, the cubic step is taken, - % else the average of the cubic and quadratic steps is taken. - % - if (fp > fx) - info = 1; - bound = 1; - theta = 3*(fx - fp)/(stp - stx) + dx + dp; - s = norm([theta,dx,dp],inf); - gamma = s*sqrt((theta/s)^2 - (dx/s)*(dp/s)); - if (stp < stx) - gamma = -gamma; - end - p = (gamma - dx) + theta; - q = ((gamma - dx) + gamma) + dp; - r = p/q; - stpc = stx + r*(stp - stx); - stpq = stx + ((dx/((fx-fp)/(stp-stx)+dx))/2)*(stp - stx); - if (abs(stpc-stx) < abs(stpq-stx)) - stpf = stpc; - else - stpf = stpc + (stpq - stpc)/2; - end - brackt = 1; - % - % Second case. A lower function value and derivatives of - % opposite sign. The minimum is bracketed. If the cubic - % step is closer to stx than the quadratic (secant) step, - % the cubic step is taken, else the quadratic step is taken. - % - elseif (sgnd < 0.0) - info = 2; - bound = 0; - theta = 3*(fx - fp)/(stp - stx) + dx + dp; - s = norm([theta,dx,dp],inf); - gamma = s*sqrt((theta/s)^2 - (dx/s)*(dp/s)); - if (stp > stx) - gamma = -gamma; - end - p = (gamma - dp) + theta; - q = ((gamma - dp) + gamma) + dx; - r = p/q; - stpc = stp + r*(stx - stp); - stpq = stp + (dp/(dp-dx))*(stx - stp); - if (abs(stpc-stp) > abs(stpq-stp)) - stpf = stpc; - else - stpf = stpq; - end - brackt = 1; - % - % Third case. A lower function value, derivatives of the - % same sign, and the magnitude of the derivative decreases. - % The cubic step is only used if the cubic tends to infinity - % in the direction of the step or if the minimum of the cubic - % is beyond stp. Otherwise the cubic step is defined to be - % either stpmin or stpmax. The quadratic (secant) step is also - % computed and if the minimum is bracketed then the the step - % closest to stx is taken, else the step farthest away is taken. - % - elseif (abs(dp) < abs(dx)) - info = 3; - bound = 1; - theta = 3*(fx - fp)/(stp - stx) + dx + dp; - s = norm([theta,dx,dp],inf); - % - % The case gamma = 0 only arises if the cubic does not tend - % to infinity in the direction of the step. - % - gamma = s*sqrt(max(0.,(theta/s)^2 - (dx/s)*(dp/s))); - if (stp > stx) - gamma = -gamma; - end - p = (gamma - dp) + theta; - q = (gamma + (dx - dp)) + gamma; - r = p/q; - if (r < 0.0 && gamma ~= 0.0) - stpc = stp + r*(stx - stp); - elseif (stp > stx) - stpc = stpmax; - else - stpc = stpmin; - end - stpq = stp + (dp/(dp-dx))*(stx - stp); - if (brackt) - if (abs(stp-stpc) < abs(stp-stpq)) - stpf = stpc; - else - stpf = stpq; - end - else - if (abs(stp-stpc) > abs(stp-stpq)) - stpf = stpc; - else - stpf = stpq; - end - end - % - % Fourth case. A lower function value, derivatives of the - % same sign, and the magnitude of the derivative does - % not decrease. If the minimum is not bracketed, the step - % is either stpmin or stpmax, else the cubic step is taken. - % - else - info = 4; - bound = 0; - if (brackt) - theta = 3*(fp - fy)/(sty - stp) + dy + dp; - s = norm([theta,dy,dp],inf); - gamma = s*sqrt((theta/s)^2 - (dy/s)*(dp/s)); - if (stp > sty) - gamma = -gamma; - end - p = (gamma - dp) + theta; - q = ((gamma - dp) + gamma) + dy; - r = p/q; - stpc = stp + r*(sty - stp); - stpf = stpc; - elseif (stp > stx) - stpf = stpmax; - else - stpf = stpmin; - end - end - % - % Update the interval of uncertainty. This update does not - % depend on the new step or the case analysis above. - % - if (fp > fx) - sty = stp; - fy = fp; - dy = dp; - else - if (sgnd < 0.0) - sty = stx; - fy = fx; - dy = dx; - end - stx = stp; - fx = fp; - dx = dp; - end - % - % Compute the new step and safeguard it. - % - stpf = min(stpmax,stpf); - stpf = max(stpmin,stpf); - stp = stpf; - if (brackt && bound) - if (sty > stx) - stp = min(stx+p66*(sty-stx),stp); - else - stp = max(stx+p66*(sty-stx),stp); - end - end -end diff --git a/private/Process_Function.m b/private/Process_Function.m deleted file mode 100644 index 7252f91..0000000 --- a/private/Process_Function.m +++ /dev/null @@ -1,8 +0,0 @@ -function obj = Process_Function(obj) - -if ~isfield(obj, 'isConvex') || isempty(obj.isConvex), obj.isConvex = 0; end -if ~isfield(obj, 'isQuadratic') || isempty(obj.isQuadratic), obj.isQuadratic = 0; end -if ~isfield(obj, 'isConjQuadratic') || isempty(obj.isConjQuadratic), obj.isConjQuadratic = 0; end -if ~isfield(obj, 'hasHessian') || isempty(obj.hasHessian), obj.hasHessian = 0; end - -end diff --git a/private/Process_LineSearchOptions.m b/private/Process_LineSearchOptions.m deleted file mode 100755 index cd1dfe2..0000000 --- a/private/Process_LineSearchOptions.m +++ /dev/null @@ -1,114 +0,0 @@ -function lsopt = Process_LineSearchOptions(opt) - -% % factor in [0, 1] used to compute average cost magnitude C_k as follows: -% % Q_k = 1 + (Delta)Q_k-1, Q_0 = 0, C_k = C_k-1 + (|f_k| - C_k-1)/Q_k -% lsopt.Delta = 0.7;% this goes here to include Hager-Zhang line search as a backup - % Armijo condition parameter delta, range [0, .5] - % phi (a) - phi (0) <= delta phi'(0) - if isfield(opt, 'delta'), lsopt.delta = opt.delta; - else lsopt.delta = 0.1; end - lsopt.beta = opt.beta; - switch opt.linesearch - case 'backtracking' - lsopt.linesearchfun = @LineSearch_Backtracking; - lsopt.progTol = 0; - lsopt.nLS = 50; - case 'backtracking-nm' % nonmonotone - lsopt.linesearchfun = @LineSearch_BacktrackingNM; - lsopt.eta = 0.85; - lsopt.progTol = 0; - lsopt.nLS = 50; - case 'backtracking-armijo' - lsopt.linesearchfun = @LineSearch_BacktrackingArmijo; - lsopt.progTol = 0; - lsopt.nLS = 50; - case 'lemarechal' - lsopt.linesearchfun = @LineSearch_Lemarechal; - lsopt.sigma = 0.9; - % maximum number of iterations - lsopt.nbracket = 100; - % type of interpolation - 0 [bisection], 1 [quadratic - % interpolation], 2 [cubic interpolation when possible] - lsopt.interp = 0; - if isfield(opt, 'interp'), lsopt.interp = opt.interp; end - % stop when length of interval is below progTol - lsopt.progTol = 0; - % growth factor in search for initial bracket interval - lsopt.rho = 5; - % parameter for safe-guarding (must be in (0,1/2]) - lsopt.theta = 0.49; - case 'hager-zhang' - lsopt.linesearchfun = @LineSearch_HagerZhang; - lsopt.sigma = 0.9; - % maximum number of times the bracketing interval grows during expansion - lsopt.nexpand = 50; - % maximum number of secant steps - lsopt.nsecant = 50; - % maximum number of times the bracketing interval contracts - lsopt.ncontract = 10; - % factor by which eps grows when line search fails during contraction - lsopt.egrow = 10; - lsopt.QuadOK = true; - % T => when possible, use a cubic step in the line search - lsopt.UseCubic = true; - % true => estimated error in function value is eps*Ck, - % false => estimated error in function value is eps */ - lsopt.PertRule = true; - lsopt.eps = 1e-6; - % |f| < SmallCost*starting cost => skip QuadStep and set PertRule = FALSE*/ - lsopt.SmallCost = 1e-30; - % T => use approximate Wolfe line search - % F => use ordinary Wolfe line search, switch to approximate Wolfe when - % |f_k+1-f_k| < omega*C_k, C_k = average size of cost */ - lsopt.AWolfe = false; - lsopt.omega = 1e-3; - % factor by which secant step is amplified during expansion phase where minimizer is bracketed - lsopt.SecantAmp = 1.05; - % factor by which rho grows during expansion phase where minimizer is bracketed - lsopt.RhoGrow = 2.0; - % maximum number of times that eps is updated - lsopt.neps = 5; - % maximum factor secant step increases stepsize in expansion phase - lsopt.ExpandSafe = 200; - % value of the parameter theta in the cg_descent update formula: - % W. W. Hager and H. Zhang, A survey of nonlinear conjugate gradient - % methods, Pacific Journal of Optimization, 2 (2006), pp. 35-58. - lsopt.theta = 0.5; - % growth factor in search for initial bracket interval - lsopt.rho = 5; - % decay factor for bracket interval width in line search, range (0, 1) - lsopt.gamma = 0.66; - case 'more-thuente' - lsopt.linesearchfun = @LineSearch_MoreThuente; - lsopt.sigma = 0.9; - lsopt.progTol = 0; - lsopt.tmin = 0; - lsopt.tmax = 1e15; - lsopt.maxfev = 100; - case 'fletcher' - lsopt.linesearchfun = @LineSearch_Fletcher; - lsopt.sigma = 0.9; - % maximum number of times the bracketing interval grows during expansion - lsopt.nbracket = 50; - % maximum number of section steps - lsopt.nsection = 50; - % stop when progress is below progTol - lsopt.progTol = 0; - % estimate of minimum value of the function - lsopt.fmin = -inf; - end - - % if method is not L-BFGS then initial stepsize is selected according to Hager-Zhang - lsopt.quadStep = true; - % starting guess for line search = - % psi0 ||x_0||_infty over ||g_0||_infty if x_0 != 0 - % psi0 |f(x_0)|/||g_0||_2 otherwise */ - lsopt.psi0 = 0.01; - % when the function is approximately quadratic, use gradient at - % psi1*psi2*previous step for estimating initial stepsize */ - lsopt.psi1 = 1.0 ; - % when starting a new cg iteration, our initial guess for the line - % search stepsize is psi2*previous step */ - lsopt.psi2 = 2; - -end diff --git a/private/Process_Options.m b/private/Process_Options.m deleted file mode 100755 index 13f1bd9..0000000 --- a/private/Process_Options.m +++ /dev/null @@ -1,75 +0,0 @@ -function opt = Process_Options(opt) - -% General options - -if ~isfield(opt, 'tol') || isempty(opt.tol), opt.tol = 1e-8; end -if ~isfield(opt, 'term') || isempty(opt.term), opt.customTerm = false; -else opt.customTerm = true; end -if ~isfield(opt, 'record') || isempty(opt.record), opt.toRecord = false; -else opt.toRecord = true; end -if ~isfield(opt, 'maxit') || isempty(opt.maxit), opt.maxit = 10000; end -if ~isfield(opt, 'beta') || isempty(opt.beta), opt.beta = 0.05; end -if ~isfield(opt, 'variant'), opt.variant = ''; end -if ~isfield(opt, 'display') || isempty(opt.display), opt.display = 2; end -if ~isfield(opt, 'report') || isempty(opt.report), opt.report = 1; end -if ~isfield(opt, 'useHessian') || isempty(opt.useHessian), opt.useHessian = false; end -if ~isfield(opt, 'metric') || isempty(opt.metric), opt.metric = @(x) x; end - -% Methods (directions) options - -if ~isfield(opt, 'modBroyden') || isempty(opt.modBroyden), opt.modBroyden = 3; end -if ~isfield(opt, 'deltaCurvature') || isempty(opt.deltaCurvature), opt.deltaCurvature = 1e-6; end -if ~isfield(opt, 'thetaBar') || isempty(opt.thetaBar), opt.thetaBar = 1e-4; end -if ~isfield(opt, 'initialScaling') || isempty(opt.initialScaling), opt.initialScaling = 0; end -if ~isfield(opt, 'memory') || isempty(opt.memory), opt.memory = 10; end - -opt.optsL.UT = true; -opt.optsL.TRANSA = true; -opt.optsU.UT = true; - -if strcmp(opt.variant, 'fast'), opt.fast = 1; -else opt.fast = 0; end - -% Sets default solver if not specified - -if ~isfield(opt, 'solver') || isempty(opt.solver) - opt.solver = 'zerofpr'; -end -opt.solverfun = str2func(opt.solver); - -% Sets default method if not specified - -if ~isfield(opt, 'method') || isempty(opt.method) - if strcmp(opt.solver, 'fbs') - opt.method = ''; - else - opt.method = 'lbfgs'; - end -end -opt.methodfun = str2func(strcat('Direction_', lower(opt.method))); -if ~isfield(opt, 'memopt'), opt.memopt = 1; end - -% Sets default line-search if not specified - -if strcmp(opt.solver, 'classical') - method2linesearch = containers.Map( ... - {'sd', 'bfgs', 'bfgs_naive', 'lbfgs', 'cg-desc', 'cg-prp', 'cg-dyhs', 'bb', 'broyden', 'lbroyden', 'rbroyden'}, ... - {'armijo', 'lemarechal', 'lemarechal', 'lemarechal', 'lemarechal', 'lemarechal', 'lemarechal', 'nonmonotone-armijo', 'lemarechal', 'lemarechal', 'lemarechal'}); -elseif strcmp(opt.solver, 'fbs') - method2linesearch = @(s) ''; -else - method2linesearch = @(s) 'backtracking'; -end -if ~isfield(opt, 'linesearch') || isempty(opt.linesearch) - opt.linesearch = method2linesearch(opt.method); -end - -% Wrap up a string describing the algorithm - -opt.name = opt.solver; -if ~isempty(opt.variant) > 0, opt.name = strcat([opt.name, ', ', opt.variant]); end -if ~isempty(opt.method) > 0, opt.name = strcat([opt.name, ', ', opt.method]); end -if ~isempty(opt.linesearch) > 0, opt.name = strcat([opt.name, ', ', opt.linesearch]); end -opt.processed = true; - -end diff --git a/private/Process_PrimalOutput.m b/private/Process_PrimalOutput.m deleted file mode 100755 index 404bad2..0000000 --- a/private/Process_PrimalOutput.m +++ /dev/null @@ -1,3 +0,0 @@ -function out = Process_PrimalOutput(prob, out_solver) - -end diff --git a/private/Process_Problem.m b/private/Process_Problem.m deleted file mode 100755 index d519f8f..0000000 --- a/private/Process_Problem.m +++ /dev/null @@ -1,160 +0,0 @@ -function [prob, id] = Process_Problem(fs, gs, init, aff, constr) - - if ~isempty(aff) && ~isempty(constr) - error('cannot have both constraints and affine mappings'); - end - - M = length(fs); - N = length(gs); - - if ~isa(fs, 'cell'), fs = {fs}; end - if ~isa(gs, 'cell'), gs = {gs}; end - - for i = 1:M, fs{i} = Process_Function(fs{i}); end - for i = 1:N, gs{i} = Process_Function(gs{i}); end - - if ~isempty(aff) - if isa(aff, 'double') || isa(aff, 'struct') - aff = {aff}; - end - if ~isa(aff, 'cell') - error('the list of affine maps must be a cell array or a matrix'); - end - if size(aff,2) ~= N+1 - error('affine term doesn''t match the number of g''s'); - end - if size(aff,1) ~= M - error('affine term doesn''t match the number of f''s'); - end - end - - if isempty(constr) - id = 1; - [f1, C1, d1, f2, C2, d2, g] = combineTermsComposite(fs, gs, aff); - prob = ProblemComposite(f1, C1, d1, f2, C2, d2, g, [], [], init); - else - id = 2; - if ~isa(constr, 'cell') - error('the constraint must be a cell array'); - end - if size(constr, 1) ~= N - error('constraint doesn''t match the number of gs'); - end - if size(constr, 2) ~= M+2 - error('constraint doesn''t match the number of fs'); - end - [f1, A1, f2, A2, g, B, b] = combineTermsSeparable(fs, gs, constr); - prob = ProblemComposite(conjugate(f1), -A1', [], conjugate(f2), -A2', [], conjugate(g), -B', b, init); - end - -end - -function [idx_quad, idx_nonquad] = splitSmooth(fs, conj) - - idx_quad = []; - idx_nonquad = []; - - for i = 1:length(fs) - if ~conj && isfield(fs{i}, 'isQuadratic') && fs{i}.isQuadratic - idx_quad(end+1) = i; - elseif conj && isfield(fs{i}, 'isConjQuadratic') && fs{i}.isConjQuadratic - idx_quad(end+1) = i; - else - idx_nonquad(end+1) = i; - end - end - -end - -function [f1, C1, d1, f2, C2, d2, g] = combineTermsComposite(fs, gs, aff) - - f1 = []; C1 = []; d1 = []; - f2 = []; C2 = []; d2 = []; - g = []; - - M = length(fs); - N = length(gs); - - if N > 1 - dims = {}; - for j=1:N, dims{j} = size(aff{1,j},2); end - g = separableSum(gs, dims); - else - g = gs{1}; - end - - aff1 = {}; - if ~isempty(aff) - for i=1:M - aff1{i,1} = horzcat(aff{i,1:N}); - if length(aff(i,:)) == N, aff1{i,2} = 0; - else aff1{i,2} = aff{i,N+1}; end - end - end - - [idx_quad, idx_nonquad] = splitSmooth(fs, 0); - if ~isempty(idx_quad) - if length(idx_quad) > 1 - dims = {}; - for i=1:length(idx_quad), dims{i} = size(aff1{idx_quad(i),1},1); end - f1 = separableSum(fs(idx_quad), dims); - else - f1 = fs{idx_quad(1)}; - end - if ~isempty(aff1) - C1 = vertcat(aff1{idx_quad,1}); - d1 = vertcat(aff1{idx_quad,2}); - end - end - if ~isempty(idx_nonquad) - if length(idx_nonquad) > 1 - dims = {}; - for i=1:length(idx_nonquad), dims{i} = size(aff1{idx_nonquad(i),1},1); end - f2 = separableSum(fs(idx_nonquad), dims); - else - f2 = fs{idx_nonquad(1)}; - end - if ~isempty(aff1) - C2 = vertcat(aff1{idx_nonquad,1}); - d2 = vertcat(aff1{idx_nonquad,2}); - end - end - -end - -function [f1, A1, f2, A2, g, B, b] = combineTermsSeparable(fs, gs, constr) - - f1 = []; A1 = []; - f2 = []; A2 = []; - g = []; B = []; - b = []; - - M = length(fs); - N = length(gs); - - dims = {}; - for j=1:N, dims{j} = size(constr{j,M+1},2); end - g = separableSum(gs, dims); - B = blkdiag(constr{:,M+1}); - - constr1 = {}; - for i=1:N - constr1{i} = vertcat(constr{1:N,i}); - end - - [idx_quad, idx_nonquad] = splitSmooth(fs, 1); - if ~isempty(idx_quad) - dims = {}; - for i=1:length(idx_quad), dims{i} = size(constr1{idx_quad(i)},2); end - f1 = separableSum(fs(idx_quad), dims); - A1 = horzcat(constr1{idx_quad}); - end - if ~isempty(idx_nonquad) - dims = {}; - for i=1:length(idx_nonquad), dims{i} = size(constr1{idx_nonquad(i)},2); end - f2 = separableSum(fs(idx_nonquad), dims); - A2 = horzcat(constr1{idx_nonquad}); - end - b = vertcat(constr{:,end}); - -end diff --git a/private/Util_PrintProgress.m b/private/Util_PrintProgress.m deleted file mode 100755 index 1140935..0000000 --- a/private/Util_PrintProgress.m +++ /dev/null @@ -1,22 +0,0 @@ -function Util_PrintProgress(varargin) - -if length(varargin) == 1 - it = varargin{1}; - if mod(it, 100) == 0 - fprintf('.'); - end - if mod(it, 4000) == 0 - fprintf('\n'); - end -end - -if length(varargin) == 2 - flag = varargin{2}; - if flag == 0 - fprintf('*\n'); - else - fprintf('!!!\n'); - end -end - -end diff --git a/private/afbs_noncvx.m b/private/afbs_noncvx.m deleted file mode 100644 index c6ed410..0000000 --- a/private/afbs_noncvx.m +++ /dev/null @@ -1,168 +0,0 @@ -function out = afbs_noncvx(prob, opt, varargin) - -MAXIMUM_Lf = 1e14; - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); - % initialize operations counter - ops = FBOperations(); -else - ops = []; -end - -msgTerm = ''; -record = []; - -% display stuff - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('\n%6s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.'); -end - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -% set stepsize, initialize vectors - -gam_x = (1-opt.beta)/Lf; -gam_y = (1-opt.beta)/Lf; - -t1 = 1; t0 = 0; -eta = 0.5; % eta = 0.0 for monotone, > 0.0 for nonmonotone -q = 1.0; -del = 1e-2; % the smaller this is, the more extrapolations are accepted -c = opt.F0; - -cache_z1 = FBCache(prob, prob.x0, gam_x, ops); -cache_x1 = FBCache(prob, prob.x0, gam_x, ops); -cache_x0 = FBCache(prob, prob.x0, gam_x, ops); - -tic0 = tic(); - -for it = 1:opt.maxit - - z1 = cache_z1.Get_Point(); - x1 = cache_x1.Get_Point(); - x0 = cache_x0.Get_Point(); - - y1 = x1 + t0/t1*(z1 - x1) + (t0-1)/t1*(x1 - x0); - cache_y1 = FBCache(prob, y1, gam_y, ops); - - if adaptive - [hasGammaChanged_y, ~] = cache_y1.Backtrack_Gamma(opt.beta); - gam_y = cache_y1.Get_Gamma(); - end - - z1 = cache_y1.Get_ProxGradStep(); - cache_z1 = FBCache(prob, z1, gam_x, ops); - - % record values, stopping criterion - - if it == 1 - cache_0 = cache_y1; - end - - if opt.toRecord - record = [record, opt.record(prob, it, cache_0, cache_y1)]; - end - - if opt.report - objective(1, it) = cache_y1.Get_FBE(); - residual(1, it) = norm(cache_y1.Get_FPR(), 'inf')/cache_y1.Get_Gamma(); - ts(1, it) = toc(tic0); - end - - if cache_y1.Check_StoppingCriterion(opt.tol) - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - - %%% - - F_z1 = cache_y1.Get_g() + cache_z1.Get_f(); - - cache_x0 = cache_x1; - - if F_z1 <= c - del*cache_y1.Get_NormFPR()^2 - % extrapolation is accepted - cache_x1 = cache_z1; - F_x1 = F_z1; - else - if adaptive - [hasGammaChanged_x, ~] = cache_x1.Backtrack_Gamma(opt.beta); - gam_x = cache_x1.Get_Gamma(); - if hasGammaChanged_x - cache_z1.Set_Gamma(gam_x); - end - end - v1 = cache_x1.Get_ProxGradStep(); - cache_v1 = FBCache(prob, v1, gam_x, ops); - F_v1 = cache_v1.Get_f() + cache_x1.Get_g(); - if F_z1 <= F_v1 - % extrapolation is accepted - cache_x1 = cache_z1; - F_x1 = F_z1; - else - % extrapolation is rejected (ordinary FB step) - cache_x1 = cache_v1; - F_x1 = F_v1; - end - end - - t0 = t1; - t1 = (sqrt(4*(t1^2) + 1) + 1)/2; - q1 = eta*q + 1; - c = (eta*q*c + F_x1)/q1; - q = q1; - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif (opt.display == 2 && mod(it,100) == 0) || opt.display >= 3 - res_curr = norm(cache_y1.Get_FPR(), 'inf')/cache_y1.Get_Gamma(); - obj_curr = cache_y1.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e\n', it, cache_y1.Get_Gamma(), res_curr, obj_curr); - end - -end - -time = toc(tic0); - -if it == opt.maxit - msgTerm = [msgTerm, 'exceeded maximum iterations']; - flagTerm = 1; -end - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -elseif opt.display >= 2 - res_curr = norm(cache_y1.Get_FPR(), 'inf')/cache_y1.Get_Gamma(); - obj_curr = cache_y1.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e\n', it, cache_y1.Get_Gamma(), res_curr, obj_curr); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -out.x = cache_y1.Get_ProxGradStep(); -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -out.record = record; -out.gam = cache_y1.Get_Gamma(); -out.adaptive = adaptive; -out.time = time; diff --git a/private/classical.m b/private/classical.m deleted file mode 100644 index bd2c27a..0000000 --- a/private/classical.m +++ /dev/null @@ -1,159 +0,0 @@ -function out = classical(prob, opt, lsopt) - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); - % initialize operations counter - ops = FBOperations(); -else - ops = []; -end - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -% initialize gamma - -gam = (1-opt.beta)/Lf; - -% display header - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('%6s%11s%11s%11s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.', '||dir||', 'slope', 'tau'); -end - -cacheDir.cntSkip = 0; - -t0 = tic(); - -cache_current = FBCache(prob, prob.x0, gam, ops); -restart = 0; - -t0 = tic(); - -for it = 1:opt.maxit - - % trace stuff - - if it == 1 - cache_0 = cache_current; - end - - if opt.report - residual(1, it) = norm(cache_current.Get_FPR(), 'inf')/cache_current.Get_Gamma(); - objective(1, it) = cache_current.Get_FBE(); - ts(1, it) = toc(t0); - end - - if opt.toRecord - record(:, it) = opt.record(prob, it, cache_0, cache_current); - end - - solution = cache_current.z; - - % check for termination - - if isnan(cache_current.normFPR) - msgTerm = 'something went wrong'; - flagTerm = 1; - break; - end - if ~restart - if ~opt.customTerm - if cache_current.Check_StoppingCriterion(opt.tol) - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - else - flagStop = opt.term(prob, it, cache_0, cache_current); - if (adaptive == 0 || it > 1) && flagStop - msgTerm = 'reached optimum (custom criterion)'; - flagTerm = 0; - break; - end - end - end - - % compute pair (s, y) for quasi-Newton updates - - if it > 1 - sk = cache_current.Get_Point() - cache_previous.Get_Point(); - yk = cache_current.Get_GradFBE() - cache_previous.Get_GradFBE(); - else - sk = []; - yk = []; - end - - % compute search direction and slope - - [dir, tau0, cacheDir] = ... - opt.methodfun(prob, opt, it, restart, sk, yk, cache_current.Get_GradFBE(), cacheDir); - slope = cache_current.Get_GradFBE()'*dir; - - % perform line search - - [tau, cache_tau, ~, lsopt, flagLS] = ... - lsopt.linesearchfun(cache_current, dir, slope, tau0, lsopt, adaptive, it, restart); - - % prepare next iteration, store current solution - - restart = 0; - if flagLS == -1 % gam was too large - cache_previous = cache_current; - gam = gam/2; - restart = 1; - solution = cache_current.Get_ProxGradStep(); - elseif flagLS > 0 % line-search failed - flagTerm = 2; - msgTerm = strcat(['line search failed at iteration ', num2str(it)]); - break; - else - cache_previous = cache_current; - cache_current = cache_tau; - solution = cache_tau.z; - end - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif opt.display >= 2 && mod(it,10) == 0 - fprintf('%6d %7.4e %7.4e %7.4e %7.4e %7.4e %7.4e %d\n', it, gam, residual(1,it), objective(1,it), norm(dir), slope, tau, flagLS); - end - -end - -time = toc(t0); - -if it == opt.maxit - flagTerm = 1; - msgTerm = 'exceeded maximum iterations'; -end - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -out.x = solution; -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -out.record = record; -out.gam = gam; -out.adaptive = adaptive; -out.time = time; diff --git a/private/fbs.m b/private/fbs.m deleted file mode 100755 index bd5c707..0000000 --- a/private/fbs.m +++ /dev/null @@ -1,141 +0,0 @@ -function out = fbs(prob, opt, varargin) - -MAXIMUM_Lf = 1e14; - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); -end - -% initialize operations counter -ops = FBOperations(); - -msgTerm = ''; -record = []; - -% display stuff - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('\n%6s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.'); -end - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -% set stepsize, initialize vectors - -gam = 1/Lf; -xk = prob.x0; -vk = prob.x0; - -t0 = tic(); - -for it = 1:opt.maxit - - if opt.fast - theta = 2/(it+1); % since it starts from 1 - yk = (1-theta)*xk+theta*vk; - else - yk = xk; - end - - cache_yk = FBCache(prob, yk, gam, ops); - - if it == 1 - cache_0 = cache_yk; - end - - hasGammaChanged = false; - - if adaptive - [hasGammaChanged, ~] = cache_yk.Backtrack_Gamma(0.0); - gam = cache_yk.Get_Gamma(); - end - - if opt.report - objective(1, it) = cache_yk.Get_FBE(); - residual(1, it) = norm(cache_yk.Get_FPR(), 'inf')/cache_yk.Get_Gamma(); - ts(1, it) = toc(t0); - end - - if opt.toRecord - record = [record, opt.record(prob, it, cache_0, cache_yk)]; - end - - if ~hasGammaChanged - if ~opt.customTerm - if cache_yk.Check_StoppingCriterion(opt.tol) - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - else - flagStop = opt.term(prob, it, cache_0, cache_yk); - if (adaptive == 0 || it > 1) && flagStop - msgTerm = 'reached optimum (custom criterion)'; - flagTerm = 0; - break; - end - end - end - if prob.Lf >= MAXIMUM_Lf - msgTerm = 'L is too large'; - flagTerm = 2; - break; - end - - if opt.fast - vk = xk + (cache_yk.Get_ProxGradStep()-xk)/theta; - end - - xk = cache_yk.Get_ProxGradStep(); - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif (opt.display == 2 && mod(it,100) == 0) || opt.display >= 3 - res_curr = norm(cache_yk.Get_FPR(), 'inf')/cache_yk.Get_Gamma(); - obj_curr = cache_yk.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr); - end - -end - -time = toc(t0); - -if it == opt.maxit - msgTerm = [msgTerm, 'exceeded maximum iterations']; - flagTerm = 1; -end - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -elseif opt.display >= 2 - res_curr = norm(cache_yk.Get_FPR(), 'inf')/cache_yk.Get_Gamma(); - obj_curr = cache_yk.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -out.x = cache_yk.Get_ProxGradStep(); -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -out.record = record; -out.gam = gam; -out.adaptive = adaptive; -out.time = time; diff --git a/private/ifbs_noncvx.m b/private/ifbs_noncvx.m deleted file mode 100644 index 3252cda..0000000 --- a/private/ifbs_noncvx.m +++ /dev/null @@ -1,130 +0,0 @@ -function out = ifbs_noncvx(prob, opt, varargin) - -MAXIMUM_Lf = 1e14; - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); - % initialize operations counter - ops = FBOperations(); -else - ops = []; -end - -msgTerm = ''; -record = []; - -% display stuff - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('\n%6s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.'); -end - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -if adaptive - warning('Lipschitz constant is not accurate') -end - -% set stepsize, initialize vectors - -gam = (0.99999-opt.beta)/Lf; - -cache_x0 = FBCache(prob, prob.x0, gam, ops); -cache_x1 = FBCache(prob, prob.x0, gam, ops); -cache_0 = cache_x0; - -tic0 = tic(); - -g_x1 = inf; - -for it = 1:opt.maxit - - x0 = cache_x0.Get_Point(); - x1 = cache_x1.Get_Point(); - - y1 = cache_x1.Get_GradStep(); - f_x1 = cache_x1.Get_f(); - w1 = y1 + (opt.beta/2)*(x1-x0); - - cache_w1 = FBCache(prob, w1, gam, ops); - - [x1, g_x1] = cache_w1.Get_ProxStep(w1); - - cache_x0 = cache_x1; - cache_x1 = FBCache(prob, x1, gam, ops); - - % record values, stopping criterion - - if opt.toRecord - record = [record, opt.record(prob, it, cache_0, cache_x1)]; - end - - res_curr = norm(x0-x1, 'inf')/cache_x1.Get_Gamma(); - - if opt.report - if it == 1 - objective(1, it) = inf; - else - objective(1, it) = f_x1 + g_x1; - end - residual(1, it) = res_curr; - ts(1, it) = toc(tic0); - end - - if res_curr <= opt.tol - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - - %%% - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif (opt.display == 2 && mod(it,100) == 0) || opt.display >= 3 - obj_curr = f_x1 + g_x1; - fprintf('%6d %7.4e %7.4e %7.4e\n', it, cache_x1.Get_Gamma(), res_curr, obj_curr); - end - -end - -time = toc(tic0); - -if it == opt.maxit - msgTerm = [msgTerm, 'exceeded maximum iterations']; - flagTerm = 1; -end - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -elseif opt.display >= 2 - obj_curr = f_x1 + g_x1; - fprintf('%6d %7.4e %7.4e %7.4e\n', it, cache_x1.Get_Gamma(), res_curr, obj_curr); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -out.x = x1; -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -out.record = record; -out.gam = cache_x1.Get_Gamma(); -out.adaptive = adaptive; -out.time = time; diff --git a/private/lbfgs.c b/private/lbfgs.c deleted file mode 100755 index 2794d5e..0000000 --- a/private/lbfgs.c +++ /dev/null @@ -1,142 +0,0 @@ -/* -% Copyright (C) 2015-2016, Lorenzo Stella and Panagiotis Patrinos -% -% This file is part of ForBES. -% -% ForBES is free software: you can redistribute it and/or modify -% it under the terms of the GNU Lesser General Public License as published by -% the Free Software Foundation, either version 3 of the License, or -% (at your option) any later version. -% -% ForBES is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU Lesser General Public License for more details. -% -% You should have received a copy of the GNU Lesser General Public License -% along with ForBES. If not, see . -*/ - -#include "mex.h" - -#define IS_REAL_SPARSE_MAT(P) (mxGetNumberOfDimensions(P) == 2 && \ - mxIsSparse(P) && mxIsDouble(P)) -#define IS_REAL_DENSE_MAT(P) (mxGetNumberOfDimensions(P) == 2 && \ - !mxIsSparse(P) && mxIsDouble(P)) -#define IS_REAL_DENSE_VEC(P) ((mxGetNumberOfDimensions(P) == 1 || \ - (mxGetNumberOfDimensions(P) == 2 && (mxGetN(P) == 1 || mxGetM(P) == 1))) && \ - !mxIsSparse(P) && mxIsDouble(P)) -#define IS_INT32_DENSE_VEC(P) ((mxGetNumberOfDimensions(P) == 1 || \ - (mxGetNumberOfDimensions(P) == 2 && (mxGetN(P) == 1 || mxGetM(P) == 1))) && \ - !mxIsSparse(P) && mxIsInt32(P)) -#define IS_REAL_SCALAR(P) (IS_REAL_DENSE_VEC(P) && mxGetNumberOfElements(P) == 1) -#define IS_INT32_SCALAR(P) (IS_INT32_DENSE_VEC(P) && mxGetNumberOfElements(P) == 1) - -void LBFGS_MATVEC_TWOLOOP(int n, int mem, double * dir_n, double * s_n_m, double * y_n_m, - double * ys_m, double H, double * g_n, int curridx, int currmem, double * alpha_m) -{ - double beta; - int i, j, k; - - for (j=0; j=mem) i = 0; - for (k=0; k=mem) i = 0; - } -} - -void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) -{ - int n, mem, curridx, currmem, dir_dims[2]; - double * dir, * s, * y, * ys, H, * g, * alpha; - - if (nrhs != 7) { - mexErrMsgTxt("LBFGS: you should provide exactly 7 arguments."); - return; - } - if (nlhs > 1) { - mexErrMsgTxt("LBFGS: too many output arguments."); - return; - } - if (!IS_REAL_DENSE_MAT(prhs[0])) { - mexErrMsgTxt("LBFGS: 1st argument must be a double, dense matrix."); - return; - } - if (!IS_REAL_DENSE_MAT(prhs[1])) { - mexErrMsgTxt("LBFGS: 2nd argument must be a double, dense matrix."); - return; - } - if (!IS_REAL_DENSE_VEC(prhs[2])) { - mexErrMsgTxt("LBFGS: 3rd argument must be a double, dense vector."); - return; - } - if (!IS_REAL_SCALAR(prhs[3])) { - mexErrMsgTxt("LBFGS: 4th argument must be a double scalar."); - return; - } - if (!IS_REAL_DENSE_VEC(prhs[4])) { - mexErrMsgTxt("LBFGS: 5th argument must be a double, dense vector."); - return; - } - if (!IS_INT32_SCALAR(prhs[5])) { - mexErrMsgTxt("LBFGS: 6th argument must be a 32-bit integer."); - return; - } - if (!IS_INT32_SCALAR(prhs[6])) { - mexErrMsgTxt("LBFGS: 7th argument must be a 32-bit integer."); - return; - } - - s = mxGetPr(prhs[0]); - y = mxGetPr(prhs[1]); - ys = mxGetPr(prhs[2]); - H = mxGetScalar(prhs[3]); - g = mxGetPr(prhs[4]); - curridx = (int)mxGetScalar(prhs[5])-1; - currmem = (int)mxGetScalar(prhs[6]); - - n = mxGetDimensions(prhs[0])[0]; - mem = mxGetDimensions(prhs[0])[1]; - dir_dims[0] = n; - dir_dims[1] = 1; - - alpha = mxCalloc(mem, sizeof(double)); - - dir_dims[0] = n; - dir_dims[1] = 1; - plhs[0] = mxCreateNumericArray(2, dir_dims, mxDOUBLE_CLASS, mxREAL); - dir = mxGetPr(plhs[0]); - - LBFGS_MATVEC_TWOLOOP(n, mem, dir, s, y, ys, H, g, curridx, currmem, alpha); - - mxFree(alpha); -} diff --git a/private/minfbe.m b/private/minfbe.m deleted file mode 100755 index 761491b..0000000 --- a/private/minfbe.m +++ /dev/null @@ -1,188 +0,0 @@ -function out = minfbe(prob, opt, lsopt) - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); - % initialize operations counter - ops = FBOperations(); -else - ops = []; -end - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -% initialize gamma - -gam = (1-opt.beta)/Lf; - -% display header - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('\n%6s%11s%11s%11s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.', '||dir||', 'slope', 'tau'); -end - -cache_dir.cntSkip = 0; -restart = 0; - -cache_current = FBCache(prob, prob.x0, gam, ops); - -t0 = tic(); - -for it = 1:opt.maxit - - % store initial cache - - if it == 1 - cache_0 = cache_current; - end - - % trace stuff - - solution = cache_current.Get_ProxGradStep(); - - if opt.report - objective(1, it) = cache_current.Get_FBE(); - ts(1, it) = toc(t0); - residual(1, it) = norm(cache_current.Get_FPR(), 'inf')/cache_current.Get_Gamma(); - end - if opt.toRecord - record(:, it) = opt.record(prob, it, cache_0, cache_current); - end - - % check for termination - - if ~restart - if ~opt.customTerm - if cache_current.Check_StoppingCriterion(opt.tol) - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - else - flagStop = opt.term(prob, it, cache_0, cache_current); - if (adaptive == 0 || it > 1) && flagStop - msgTerm = 'reached optimum (custom criterion)'; - flagTerm = 0; - break; - end - end - end - - % store pair (s, y) to compute direction - - if it > 1 - if opt.memopt == 1 - sk = cache_current.Get_Point() - cache_previous.Get_Point(); - yk = cache_current.Get_GradFBE() - cache_previous.Get_GradFBE(); - elseif opt.memopt == 2 - sk = cache_tau.Get_Point() - cache_previous.Get_Point(); - yk = cache_tau.Get_GradFBE() - cache_previous.Get_GradFBE(); - end - else - sk = []; - yk = []; - end - - % compute search direction and slope - - [dir, tau0, cache_dir] = ... - opt.methodfun(prob, opt, it, restart, sk, yk, cache_current.Get_GradFBE(), cache_dir); - slope = vec(cache_current.Get_GradFBE())'*dir(:); - - % perform line search - - [tau, cache_tau, cache_tau1, lsopt, flagLS] = ... - lsopt.linesearchfun(cache_current, dir, slope, tau0, lsopt, adaptive, it, restart); - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%% DEBUG CODE %%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -% if isfield(opt, 'JR') -% if ~exist('out'), out = struct(); end -% if ~isfield(out, 'superlinJR'), out.superlinJR = []; end -% out.superlinJR(end+1) = norm(cache_current.Get_GradFBE() + opt.JR*dir)/norm(dir); -% end -% -% if isfield(opt, 'H') -% if ~exist('out'), out = struct(); end -% if ~isfield(out, 'superlinH'), out.superlinH = []; end -% out.superlinH(end+1) = norm(cache_current.Get_GradFBE() + opt.H*dir)/norm(dir); -% end -% -% if isfield(opt, 'M') -% if ~exist('out'), out = struct(); end -% if ~isfield(out, 'superlinM'), out.superlinM = []; end -% out.superlinM(end+1) = norm(opt.M*dir)/norm(dir); -% end - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - % prepare next iteration, store current solution - - restart = 0; - cache_previous = cache_current; - solution = cache_current.Get_ProxGradStep(); - if flagLS == -1 % gam was too large - gam = cache_tau.Get_Gamma(); - cache_current.Set_Gamma(gam); - restart = 1; - elseif flagLS > 0 % line-search failed - cache_current = FBCache(prob, cache_current.Get_ProxGradStep(), gam, ops); - else - if ~isempty(cache_tau1) - cache_current = cache_tau1; - % so that something about the new iterate has already been computed - else - cache_current = FBCache(prob, cache_tau.Get_ProxGradStep(), gam, ops); - end - end - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif (opt.display == 2 && mod(it,10) == 0) || opt.display >= 3 - fprintf('%6d %7.4e %7.4e %7.4e %7.4e %7.4e %7.4e %d\n', it, gam, residual(1,it), objective(1,it), norm(dir), slope, tau, flagLS); - end - -end - -time = toc(t0); - -if it == opt.maxit - flagTerm = 1; - msgTerm = 'exceeded maximum iterations'; -end - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -elseif opt.display >= 2 - fprintf('%6d %7.4e %7.4e %7.4e\n', it, gam, residual(1,it), objective(1,it)); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -out.x = solution; -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -if opt.toRecord, out.record = record; end -out.gam = gam; -out.time = time; -out.cacheDir = cache_dir; diff --git a/private/nama.m b/private/nama.m deleted file mode 100644 index 0099d64..0000000 --- a/private/nama.m +++ /dev/null @@ -1,192 +0,0 @@ -function out = nama(prob, opt, varargin) - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); -end - -% initialize operations counter -ops = FBOperations(); - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -% initialize gamma and sigma - -gam = (1-opt.beta)/Lf; - -% display header - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('\n%6s%11s%11s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.', '||d||', 'tau'); -end - -cacheDir.cntSkip = 0; - -msgTerm = 'exceeded maximum iterations'; -flagTerm = 1; - -restart1 = 0; -restart2 = 0; - -cache_x = FBCache(prob, prob.x0, gam, ops); - -t0 = tic(); - -for it = 1:opt.maxit - - % backtracking on gamma - - if adaptive - [restart1, ~] = cache_x.Backtrack_Gamma(opt.beta); - gam = cache_x.Get_Gamma(); - end - - % trace stuff - - if it == 1 - cache_0 = cache_x; - end - - if opt.report - objective(1, it) = cache_x.Get_FBE(); - residual(1, it) = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - ts(1, it) = toc(t0); - end - if opt.toRecord - record(:, it) = opt.record(prob, it, cache_0, cache_x); - end - - % check for termination - - if ~(restart1 || restart2) - if ~opt.customTerm - if cache_x.Check_StoppingCriterion(opt.tol) - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - else - flagStop = opt.term(prob, it, cache_0, cache_x); - if (adaptive == 0 || it > 1) && flagStop - msgTerm = 'reached optimum (custom criterion)'; - flagTerm = 0; - break; - end - end - end - - % compute search direction and slope - - if it == 1 || restart1 || restart2 - sk = []; - yk = []; - end - - [dir_QN, ~, cacheDir] = ... - opt.methodfun(prob, opt, it, restart1 || restart2, sk, yk, cache_x.Get_FPR(), cacheDir); - dir_FB = -cache_x.Get_FPR(); - - % perform line search - - tau = 1.0; % this *must* be 1.0 for this line-search to work - cache_x.Set_Directions(dir_QN); - cache_w = cache_x.Get_CacheLine(tau, 1); - if adaptive - [restart2, cache_wbar] = cache_w.Backtrack_Gamma(opt.beta); - gam = cache_w.Get_Gamma(); - else - cache_wbar = []; - end - if restart2 - cache_x.Set_Gamma(gam); - continue; - end - if cache_w.Get_FBE() > cache_x.Get_FBE() - cache_x.Set_Directions([], dir_FB); - end - while cache_w.Get_FBE() > cache_x.Get_FBE() - if tau <= 1e-3 - % simply do forward-backward step if line-search fails - cache_w = FBCache(prob, cache_x.Get_ProxGradStep(), gam, ops); - % next line is for debugging purposes in case the code reaches this - % cache_xbar = FBCache(prob, cache_x.Get_ProxGradStep(), gam, []); - break; - end - tau = tau/2; - cache_w = cache_x.Get_CacheSegment(tau); - if adaptive - [restart2, cache_wbar] = cache_w.Backtrack_Gamma(opt.beta); - gam = cache_w.Get_Gamma(); - if restart2, break; end - end - end - if restart2 - cache_x.Set_Gamma(gam); - continue; - end - restart2 = 0; - - % store pair (s, y) to compute next direction - - sk = cache_w.Get_Point() - cache_x.Get_Point(); - yk = cache_w.Get_FPR() - cache_x.Get_FPR(); - - % update iterate - - if ~isempty(cache_wbar) - cache_x = cache_wbar; - else - cache_x = FBCache(prob, cache_w.Get_ProxGradStep(), gam, ops); - end - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif (opt.display == 2 && mod(it,10) == 0) || opt.display >= 3 - res_curr = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - obj_curr = cache_x.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr, norm(dir_QN), tau); - end - -end - -time = toc(t0); - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -elseif opt.display >= 2 - res_curr = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - obj_curr = cache_x.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -if it == opt.maxit - out.x = cache_x.Get_Point(); -else - out.x = cache_x.Get_ProxGradStep(); -end -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -if opt.toRecord, out.record = record; end -out.gam = gam; -out.time = time; -out.cacheDir = cacheDir; - -end diff --git a/private/sign0.m b/private/sign0.m deleted file mode 100644 index 0e6493e..0000000 --- a/private/sign0.m +++ /dev/null @@ -1,11 +0,0 @@ -% Custom sign function: sign0(x) = sign(x) if x != 0, 1 otherwise - -function x = sign0(x) - -if x == 0 - x = 1; - return -end -x = sign(x); - -end \ No newline at end of file diff --git a/private/zerofpr.m b/private/zerofpr.m deleted file mode 100755 index 29133f0..0000000 --- a/private/zerofpr.m +++ /dev/null @@ -1,183 +0,0 @@ -function out = zerofpr(prob, opt, lsopt) - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); - % initialize operations counter - ops = FBOperations(); -else - ops = []; -end - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -% initialize gamma and sigma - -gam = (1-opt.beta)/Lf; -sig = opt.beta/(4*gam); - -% display header - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('\n%6s%11s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.', 'tau'); -end - -cacheDir.cntSkip = 0; - -flagTerm = 0; - -t0 = tic(); - -cache_x = FBCache(prob, prob.x0, gam, ops); -restart = 0; - -for it = 1:opt.maxit - - % backtracking on gamma - - if adaptive - [restart, cache_xbar] = cache_x.Backtrack_Gamma(opt.beta); - gam = cache_x.Get_Gamma(); - sig = opt.beta/(4*gam); - else - x_bar = cache_x.Get_ProxGradStep(); - cache_xbar = FBCache(prob, x_bar, cache_x.Get_Gamma(), ops); - end - - if opt.report - objective(1,it) = cache_x.Get_FBE(); - residual(1, it) = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - ts(1, it) = toc(t0); - end - - if it == 1 - cache_0 = cache_x; - end - - if opt.toRecord - record(:, it) = opt.record(prob, it, cache_0, cache_x); - end - - % check for termination - - if ~restart - if ~opt.customTerm - if cache_x.Check_StoppingCriterion(opt.tol) - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - else - flagStop = opt.term(prob, it, cache_0, cache_x); - if (adaptive == 0 || it > 1) && flagStop - msgTerm = 'reached optimum (custom criterion)'; - flagTerm = 0; - break; - end - end - end - - % compute search direction - - if it == 1 || restart - sk = []; - yk = []; - end - - [dir, tau0, cacheDir] = ... - opt.methodfun(prob, opt, it, restart, sk, yk, cache_xbar.Get_FPR(), cacheDir); - - % perform line search - - ref = cache_x.Get_FBE(); - lin = 0.0; - const = -sig*cache_x.Get_NormFPR()^2; - [tau, cache_tau, ~, lsopt, ~] = ... - lsopt.linesearchfun(cache_xbar, dir, 0.0, tau0, lsopt, false, it, restart, ref, lin, const); - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%% DEBUG CODE %%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -% if isfield(opt, 'JR') -% if ~exist('out'), out = struct(); end -% if ~isfield(out, 'superlinJR'), out.superlinJR = []; end -% out.superlinJR(end+1) = norm(cache_xbar.Get_FPR() + opt.JR*dir)/norm(dir); -% end -% -% if isfield(opt, 'H') -% if ~exist('out'), out = struct(); end -% if ~isfield(out, 'superlinH'), out.superlinH = []; end -% out.superlinH(end+1) = norm(cache_xbar.Get_FPR() + opt.H*dir)/norm(dir); -% end -% -% if isfield(opt, 'M') -% if ~exist('out'), out = struct(); end -% if ~isfield(out, 'superlinM'), out.superlinM = []; end -% out.superlinM(end+1) = norm(opt.M*dir)/norm(dir); -% end - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - % store pair (s, y) to compute next direction - - sk = cache_tau.Get_Point() - cache_xbar.Get_Point(); - yk = cache_tau.Get_FPR() - cache_xbar.Get_FPR(); - - % update iterate - - cache_x = cache_tau; - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif (opt.display == 2 && mod(it,10) == 0) || opt.display >= 3 - res_curr = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - obj_curr = cache_x.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr, tau); - end - -end - -time = toc(t0); - -if it == opt.maxit - msgTerm = 'exceeded maximum iterations'; - flagTerm = 1; -end - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -elseif opt.display >= 2 - res_curr = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - obj_curr = cache_x.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -out.x = cache_x.Get_ProxGradStep(); -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -if opt.toRecord, out.record = record; end -out.gam = gam; -out.adaptive = adaptive; -out.time = time; -out.cacheDir = cacheDir; diff --git a/private/zerofpr2.m b/private/zerofpr2.m deleted file mode 100644 index 5bd448e..0000000 --- a/private/zerofpr2.m +++ /dev/null @@ -1,183 +0,0 @@ -function out = zerofpr2(prob, opt, lsopt) - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); - % initialize operations counter - ops = FBOperations(); -else - ops = []; -end - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -% initialize gamma and sigma - -gam = (1-opt.beta)/Lf; -sig = opt.beta/(4*gam); - -% display header - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('\n%6s%11s%11s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.', '||d||', 'tau'); -end - -nonmonotone = strcmp(opt.linesearch, 'backtracking-nm'); - -cacheDir.cntSkip = 0; - -msgTerm = 'exceeded maximum iterations'; -flagTerm = 1; - -restart = 0; - -cache_x = FBCache(prob, prob.x0, gam, ops); - -t0 = tic(); - -for it = 1:opt.maxit - - % backtracking on gamma - - if adaptive - [restart, ~] = cache_x.Backtrack_Gamma(opt.beta); - gam = cache_x.Get_Gamma(); - sig = opt.beta/(4*gam); - end - - % trace stuff - - if it == 1 - cache_0 = cache_x; - end - - if opt.report - objective(1, it) = cache_x.Get_FBE(); - residual(1, it) = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - ts(1, it) = toc(t0); - end - if opt.toRecord - record(:, it) = opt.record(prob, it, cache_0, cache_x); - end - - % check for termination - - if ~restart - if ~opt.customTerm - if cache_x.Check_StoppingCriterion(opt.tol) - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - else - flagStop = opt.term(prob, it, cache_0, cache_x); - if (adaptive == 0 || it > 1) && flagStop - msgTerm = 'reached optimum (custom criterion)'; - flagTerm = 0; - break; - end - end - end - - % compute search direction and slope - - if it == 1 || restart - sk = []; - yk = []; - end - - [dir_QN, ~, cacheDir] = ... - opt.methodfun(prob, opt, it, restart, sk, yk, cache_x.Get_FPR(), cacheDir); - dir_FB = -cache_x.Get_FPR(); - - % perform line search - - tau = 1.0; % this *must* be 1.0 for this line-search to work - cache_x.Set_Directions(dir_QN); - cache_w = cache_x.Get_CacheLine(tau, 1); - ls_ref = cache_x.Get_FBE() - sig*cache_x.Get_NormFPR()^2; - - if ~nonmonotone || it == 1 || restart - lsopt.Q = 1; - lsopt.C = ls_ref; - else - newQ = lsopt.eta*lsopt.Q+1; - lsopt.C = (lsopt.eta*lsopt.Q*lsopt.C + ls_ref)/newQ; - lsopt.Q = newQ; - end - - if cache_w.Get_FBE() > lsopt.C - cache_x.Set_Directions([], dir_FB); - end - while cache_w.Get_FBE() > lsopt.C - if tau <= 1e-3 - % simply do forward-backward step if line-search fails - cache_w = FBCache(prob, cache_x.Get_ProxGradStep(), gam, ops); - % next line is for debugging purposes in case the code reaches this - % cache_xbar = FBCache(prob, cache_x.Get_ProxGradStep(), gam, []); - break; - end - tau = tau/2; - cache_w = cache_x.Get_CacheSegment(tau); - end - - % store pair (s, y) to compute next direction - - sk = cache_w.Get_Point() - cache_x.Get_Point(); - yk = cache_w.Get_FPR() - cache_x.Get_FPR(); - - % update iterate - - cache_x = cache_w; - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif (opt.display == 2 && mod(it,10) == 0) || opt.display >= 3 - res_curr = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - obj_curr = cache_x.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr, norm(dir_QN), tau); - end - -end - -time = toc(t0); - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -elseif opt.display >= 2 - res_curr = norm(cache_x.Get_FPR(), 'inf')/cache_x.Get_Gamma(); - obj_curr = cache_x.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -if it == opt.maxit - out.x = cache_x.Get_Point(); -else - out.x = cache_x.Get_ProxGradStep(); -end -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -if opt.toRecord, out.record = record; end -out.gam = gam; -out.time = time; -out.cacheDir = cacheDir; - -end diff --git a/private/zerofpr_sym.m b/private/zerofpr_sym.m deleted file mode 100644 index 89fd2f5..0000000 --- a/private/zerofpr_sym.m +++ /dev/null @@ -1,170 +0,0 @@ -function out = zerofpr_sym(prob, opt, varargin) - -% initialize output stuff - -if opt.report - residual = zeros(1, opt.maxit); - objective = zeros(1, opt.maxit); - ts = zeros(1, opt.maxit); - % initialize operations counter - ops = FBOperations(); -else - ops = []; -end - -% get Lipschitz constant & adaptiveness - -[Lf, adaptive] = prob.Get_Lipschitz(opt); - -% initialize gamma and sigma - -gam = (1-opt.beta)/Lf; -sig = opt.beta/(4*gam); - -% display header - -if opt.display >= 2 - fprintf('\n%s', opt.name); - fprintf('\n%6s%11s%11s%11s%11s%11s\n', 'iter', 'gamma', 'optim.', 'object.', '||d||', 'tau'); -end - -cacheDir.cntSkip = 0; - -msgTerm = 'exceeded maximum iterations'; -flagTerm = 1; - -restart = 0; - -cache_x = FBCache(prob, prob.x0, gam, ops); - -t0 = tic(); - -for it = 1:opt.maxit - - % backtracking on gamma - - if adaptive - [restart, ~] = cache_x.Backtrack_Gamma(opt.beta); - gam = cache_x.Get_Gamma(); - sig = opt.beta/(4*gam); - end - - % trace stuff - - if it == 1 - cache_0 = cache_x; - end - - if opt.report - objective(1, it) = cache_x.Get_FBE(); - residual(1, it) = norm(cache_x.Get_FPR(), 'inf')/gam; - ts(1, it) = toc(t0); - end - if opt.toRecord - record(:, it) = opt.record(prob, it, cache_0, cache_x); - end - - % check for termination - - if ~restart - if ~opt.customTerm - if cache_x.Check_StoppingCriterion(opt.tol) - msgTerm = 'reached optimum (up to tolerance)'; - flagTerm = 0; - break; - end - else - flagStop = opt.term(prob, it, cache_0, cache_x); - if (adaptive == 0 || it > 1) && flagStop - msgTerm = 'reached optimum (custom criterion)'; - flagTerm = 0; - break; - end - end - end - - % compute search direction and slope - - cache_xbar = FBCache(prob, cache_x.Get_ProxGradStep(), gam, ops); - rtilde = cache_x.Get_FPR() + gam*(cache_xbar.Get_Gradf() - cache_x.Get_Gradf()); - - if it == 1 || restart - sk = []; - yk = []; - else - % store pair (s, y) to compute next direction - sk = cache_x.Get_Point() - cache_xprev.Get_Point(); - yk = rtilde - rtilde_prev; - end - - [dir_QN, ~, cacheDir] = ... - opt.methodfun(prob, opt, it, restart, sk, yk, rtilde, cacheDir); - dir_FB = -cache_x.Get_FPR(); - - % perform line search - - tau = 1.0; % this *must* be 1.0 for this line-search to work - cache_xbar.Set_Directions(dir_QN - dir_FB); - cache_w = cache_xbar.Get_CacheLine(tau, 1); - ls_ref = cache_x.Get_FBE() - sig*cache_x.Get_NormFPR()^2; - while cache_w.Get_FBE() > ls_ref - if tau <= 1e-3 - % simply accept forward-backward step if line-search fails - cache_w = cache_xbar; - break; - end - tau = tau/2; - cache_w = cache_xbar.Get_CacheLine(tau, 1); - end - - % update iterate - - rtilde_prev = rtilde; - cache_xprev = cache_x; - cache_x = cache_w; - - % display stuff - - if opt.display == 1 - Util_PrintProgress(it); - elseif (opt.display == 2 && mod(it,10) == 0) || opt.display >= 3 - res_curr = norm(cache_x.Get_FPR(), 'inf')/gam; - obj_curr = cache_x.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr, norm(dir_QN), tau); - end - -end - -time = toc(t0); - -if opt.display == 1 - Util_PrintProgress(it, flagTerm); -elseif opt.display >= 2 - res_curr = norm(cache_x.Get_FPR(), 'inf')/gam; - obj_curr = cache_x.Get_FBE(); - fprintf('%6d %7.4e %7.4e %7.4e\n', it, gam, res_curr, obj_curr); -end - -% pack up results - -out.name = opt.name; -out.message = msgTerm; -out.flag = flagTerm; -if it == opt.maxit - out.x = cache_x.Get_Point(); -else - out.x = cache_x.Get_ProxGradStep(); -end -out.iterations = it; -out.operations = ops; -if opt.report - out.residual = residual(1, 1:it); - out.objective = objective(1, 1:it); - out.ts = ts(1, 1:it); -end -if opt.toRecord, out.record = record; end -out.gam = gam; -out.time = time; -out.cacheDir = cacheDir; - -end diff --git a/tests/test_CheckGamma.m b/tests/test_CheckGamma.m deleted file mode 100755 index cda1801..0000000 --- a/tests/test_CheckGamma.m +++ /dev/null @@ -1,58 +0,0 @@ -A = [1, 2, 3, 4; 2, 3, 4, 5; 3, 4, 5, 6]; -b = [1; 2; 3]; - -[m, n] = size(A); - -%% Lasso - -f = quadLoss(1, zeros(m,1)); -lam = 1.0; -g = l1Norm(lam); -x0 = ones(n, 1); - -prob = ProblemComposite(f, A, -b, [], [], [], g, [], [], x0); - -ops = FBOperations(); - -Lf = norm(A)^2; - -gam = 10/Lf; -bet = 0.05; - -cache = FBCache(prob, x0, gam, ops); -[flag, ~] = cache.Check_Gamma(bet); - -assert(flag == 0); - -gam = 0.9/Lf; - -cache.Set_Gamma(gam); -[flag, ~] = cache.Check_Gamma(bet); - -assert(flag == 1); - -%% Sparse logistic regression - -f = logLoss(3.0); -lam = 10.0; -g = l1Norm(lam); -x0 = ones(n, 1); - -prob = ProblemComposite([], [], [], f, A, -b, g, [], [], x0); - -ops = FBOperations(); - -gam = 100.0/600; -bet = 0.05; - -cache = FBCache(prob, x0, gam, ops); -[flag, ~] = cache.Check_Gamma(bet); - -assert(flag == 0); - -gam = 1.0/600; - -cache.Set_Gamma(gam); -[flag, ~] = cache.Check_Gamma(bet); - -assert(flag == 1); diff --git a/tests/test_LineFBE.m b/tests/test_LineFBE.m deleted file mode 100644 index a5d38f1..0000000 --- a/tests/test_LineFBE.m +++ /dev/null @@ -1,63 +0,0 @@ -close all; -clear; - -NUM_TOL_VAL = 1e-8; -NUM_TOL_DER = 1e-8; - -A = [1, 2, 3, 4; 2, 3, 4, 5; 3, 4, 5, 6]; -b = [1; 2; 3]; - -[m, n] = size(A); - -%% Lasso - -f = quadLoss(1, zeros(m,1)); -lam = 1.0; -g = l1Norm(lam); -x0 = ones(n, 1); - -prob = ProblemComposite(f, A, -b, [], [], [], g, [], [], x0); -ops = FBOperations(); - -gams = [10.0/200, 5.0/200, 2.0/200, 1.0/200]; - -for ix = 1:10 % try several starting points - -x = randn(n, 1); - -for igam = 1:length(gams) - -gam = gams(igam); -cache = FBCache(prob, x, gam, ops); - -for idir = 1:10 % try several random directions - -dir = randn(n, 1); -cache.Set_Directions(dir); - -taus = [1.0, 0.5, 0.25, 0.125]; - -for itau = 1:length(taus) - - tau = taus(itau); - - cache_1 = cache.Get_CacheLine(tau, 1); - cache_2 = FBCache(prob, x+tau*dir, gam, ops); - - assert(abs(cache_1.Get_FBE() - cache_2.Get_FBE())/abs(cache_2.Get_FBE()) <= NUM_TOL_VAL); - - cache_1 = cache.Get_CacheLine(tau, 2); - gradFBE = cache_2.Get_GradFBE(); - slope = gradFBE'*dir; - - assert(abs(cache_1.dFBE - slope)/abs(slope) <= NUM_TOL_DER); - - cache_1 = cache.Get_CacheLine(tau, 3); - - assert(abs(cache_1.Get_FBE() - cache_2.Get_FBE())/abs(cache_2.Get_FBE()) <= NUM_TOL_VAL); - assert(abs(cache_1.Get_Slope() - slope)/abs(slope) <= NUM_TOL_DER); - -end -end -end -end diff --git a/tests/test_MakeProblem.m b/tests/test_MakeProblem.m deleted file mode 100644 index f5e1964..0000000 --- a/tests/test_MakeProblem.m +++ /dev/null @@ -1,22 +0,0 @@ -close all; -clear; - -A = randn(15,10); -Q = A*A'; -q = randn(15, 1); -f = quadratic(Q, q); -g = indBox(-1, 1); -x0 = randn(15,1); -opt.tol = 1e-8; -opt.display = 0; -out = forbes(f, g, x0, [], [], opt); - -fs = {quadLoss(), logLoss(), quadLoss()}; -gs = {indPos(), l1Norm()}; -aff = { randn(5, 3), randn(5, 6), zeros(5, 1); - randn(7, 3), randn(7, 6), randn(7, 1); - randn(9, 3), randn(9, 6), -ones(9, 1);}; -x0 = randn(9,1); -opt.tol = 1e-8; -opt.display = 0; -out = forbes(fs, gs, x0, aff, [], opt); diff --git a/tests/test_SegmentFBE.m b/tests/test_SegmentFBE.m deleted file mode 100644 index 5143d73..0000000 --- a/tests/test_SegmentFBE.m +++ /dev/null @@ -1,58 +0,0 @@ -close all; -clear; - -NUM_TOL_VAL = 1e-8; -NUM_TOL_DER = 1e-8; - -A = [1, 2, 3, 4; 2, 3, 4, 5; 3, 4, 5, 6]; -b = [1; 2; 3]; - -[m, n] = size(A); - -%% Lasso - -f = quadLoss(1, zeros(m,1)); -lam = 1.0; -g = l1Norm(lam); -x0 = ones(n, 1); - -prob = ProblemComposite(f, A, -b, [], [], [], g, [], [], x0); -ops = FBOperations(); - -gams = [10.0/200, 5.0/200, 2.0/200, 1.0/200]; - -for ix = 1:10 % try several starting points - -x = randn(n, 1); - -for igam = 1:length(gams) - -gam = gams(igam); -cache = FBCache(prob, x, gam, ops); - -for idir = 1:10 % try several random directions - -dir1 = randn(n, 1); -dir2 = -cache.Get_FPR(); -cache.Set_Directions(dir1); -cache.Set_Directions([], dir2); - -taus = [1.0, 0.5, 0.25, 0.125]; - -for itau = 1:length(taus) - - tau = taus(itau); - - cache_1 = cache.Get_CacheSegment(tau); - cache_2 = FBCache(prob, x+tau*dir1+(1-tau)*dir2, gam, ops); - - assert(abs(cache_1.Get_FBE() - cache_2.Get_FBE())/abs(cache_2.Get_FBE()) <= NUM_TOL_VAL); - -end - -cache_1 = cache.Get_CacheSegment(0.0); -assert(norm(cache_1.Get_Point() - cache.Get_ProxGradStep(), inf) <= 1e-12); - -end -end -end diff --git a/tests/test_SolveLasso_random.m b/tests/test_SolveLasso_random.m deleted file mode 100644 index 9990f7d..0000000 --- a/tests/test_SolveLasso_random.m +++ /dev/null @@ -1,51 +0,0 @@ -close all; -clear; - -% rng(0, 'twister'); - -m = 50; -n = 200; - -A = randn(m, n); -b = randn(m, 1); - -f = quadLoss(); -aff = {A, -b}; -lam = 0.3*norm(A'*b, 'inf'); -g = l1Norm(lam); -x0 = zeros(n, 1); - -ASSERT_TOL = 1e-6; - -baseopt.display = 0; -baseopt.tol = 1e-6; -baseopt.maxit = 10000; - -opt_fbs = baseopt; opt_fbs.solver = 'fbs'; -out_fbs = forbes(f, g, x0, aff, [], opt_fbs); - -assert(out_fbs.solver.iterations < baseopt.maxit); - -opts = {}; -outs = {}; - -opts{end+1} = baseopt; opts{end}.solver = 'fbs'; opts{end}.variant = 'fast'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'broyden'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; - -for i = 1:length(opts) - outs{end+1} = forbes(f, g, x0, aff, [], opts{i}); - assert(outs{i}.flag == 0); - assert(norm(outs{i}.x - out_fbs.x,inf)/(1+norm(out_fbs.x,inf)) <= ASSERT_TOL); - fprintf('.'); -end diff --git a/tests/test_SolveLasso_small.m b/tests/test_SolveLasso_small.m deleted file mode 100644 index 7f346ff..0000000 --- a/tests/test_SolveLasso_small.m +++ /dev/null @@ -1,58 +0,0 @@ -close all; -clear; - -A = [1, 2, -1, -1; ... - -2, -1, 0, -1; ... - 3, 0, 4, -1; ... - -4, -1, -3, 1; ... - 5, 3, 2, 3]'; -b = [1, 2, 3, 4]'; - -[m, n] = size(A); - -f = quadLoss(); -aff = {A, -b}; -lam = 0.1*norm(A'*b, 'inf'); -g = l1Norm(lam); -x0 = zeros(n, 1); - -x_star = [-3.877278911564627e-01; 0; 0; 2.174149659863943e-02; 6.168435374149660e-01]; - -ASSERT_TOL = 1e-6; - -baseopt.display = 2; -baseopt.tol = 1e-8; -baseopt.maxit = 10000; - -opt_fbs = baseopt; opt_fbs.solver = 'fbs'; -out_fbs = forbes(f, g, x0, aff, [], opt_fbs); - -assert(out_fbs.solver.iterations < baseopt.maxit); - -opts = {}; -outs = {}; - -opts{end+1} = baseopt; opts{end}.solver = 'fbs'; opts{end}.variant = 'fast'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'broyden'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'rbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'lbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'rbroyden'; opts{end}.linesearch = 'backtracking'; - -for i = 1:length(opts) - outs{end+1} = forbes(f, g, x0, aff, [], opts{i}); - assert(outs{i}.flag == 0); - assert(norm(outs{i}.x - out_fbs.x,inf)/(1+norm(out_fbs.x,inf)) <= ASSERT_TOL); - fprintf('.'); -end diff --git a/tests/test_SolveNuclearNormMC_random.m b/tests/test_SolveNuclearNormMC_random.m deleted file mode 100644 index 5ce3392..0000000 --- a/tests/test_SolveNuclearNormMC_random.m +++ /dev/null @@ -1,59 +0,0 @@ -close all; -clear; - -% rng(0, 'twister'); % uncomment this to control the random number generator - -m = 30; % number of rows -n = 30; % number of column of the original matrix M -d = 0.5; % density of coefficients sampled from M -r = 3; % rank of M - -U = randn(m, r); -V = randn(n, r); -M = U*V'; - -P = sprand(m, n, d) ~= 0; % sampling pattern -B = full(M.*P); - -f = quadLoss(P(:), B(:)); -lam = 2; -g = nuclearNorm(m, n, lam, 'inexact'); -x0 = zeros(m*n, 1); - -ASSERT_TOL = 1e-5; - -%% run methods - -baseopt.display = 0; -baseopt.tol = 1e-6; -baseopt.maxit = 1000; -baseopt.Lf = 1; - -opt_fbs = baseopt; opt_fbs.solver = 'fbs'; opt_fbs.variant = 'basic'; -out_fbs = forbes(f, g, x0, [], [], opt_fbs); - -assert(out_fbs.solver.iterations < baseopt.maxit); - -opts = {}; -outs = {}; - -opts{end+1} = baseopt; opts{end}.solver = 'fbs'; opts{end}.variant = 'fast'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking-nm'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbroyden'; opts{end}.linesearch = 'backtracking-nm'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'rbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'lbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'rbroyden'; opts{end}.linesearch = 'backtracking'; - -for i = 1:length(opts) - outs{end+1} = forbes(f, g, x0, [], [], opts{i}); - assert(outs{i}.solver.iterations < opts{i}.maxit); - assert(norm(outs{i}.x - out_fbs.x,inf)/(1+norm(out_fbs.x,inf)) <= ASSERT_TOL); - fprintf('.'); -end diff --git a/tests/test_SolveQP_random.m b/tests/test_SolveQP_random.m deleted file mode 100644 index fe95ad7..0000000 --- a/tests/test_SolveQP_random.m +++ /dev/null @@ -1,58 +0,0 @@ -close all; -clear; - -n = 20; -m = 60; -densQ = 0.2; -densA = 0.2; -act = 3; - -Q = sprandsym(n, densQ, 1, 1)+1e-1*speye(n); -A = sprandn(m, n, densA); -x_star = randn(n, 1); -y_star = [rand(act, 1); zeros(m-act, 1)]; -q = -Q*x_star - A'*y_star; -b = [A(1:act,:)*x_star; A(act+1:end,:)*x_star + rand(m-act,1)]; -f_star = 0.5*(x_star'*(Q*x_star)) + q'*x_star; - -f = quadratic(Q, q); -g = indPos(); -constr = {A, 1, b}; -y0 = zeros(m, 1); - -TOL = 1e-6; -ASSERT_TOLX = 1e-4; -ASSERT_TOLF = 1e-6; - -% run solvers - -baseopt.display = 0; -baseopt.adaptive = 0; -baseopt.maxit = 10000; -baseopt.tol = TOL; - -opts = {}; -outs = {}; - -opts{end+1} = baseopt; opts{end}.solver = 'fbs'; opts{end}.variant = 'fast'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'rbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; - -for i = 1:length(opts) - outs{end+1} = forbes(f, g, y0, [], constr, opts{i}); - assert(outs{i}.flag == 0); - assert(norm(A*outs{i}.x1 + outs{i}.z - b, 'inf') <= 10*TOL); - assert(abs(outs{i}.solver.objective(end) + f_star)/(1+abs(f_star)) <= ASSERT_TOLF); - assert(norm(outs{i}.x1 - x_star, inf)/(1+norm(x_star, inf)) <= ASSERT_TOLX); - fprintf('.'); -end diff --git a/tests/test_SolveRankConstrMC_random.m b/tests/test_SolveRankConstrMC_random.m deleted file mode 100644 index 0e33403..0000000 --- a/tests/test_SolveRankConstrMC_random.m +++ /dev/null @@ -1,48 +0,0 @@ -close all; -clear; - -% rng(0, 'twister'); % uncomment this to control the random number generator - -m = 30; % number of rows -n = 30; % number of column of the original matrix M -d = 0.5; % density of coefficients sampled from M -r = 5; % rank of M -r_target = 3; - -U = randn(m, r); -V = randn(n, r); -M = U*V'; - -P = sprand(m, n, d) ~= 0; % sampling pattern -B = full(M.*P); - -f = quadLoss(P(:), B(:)); -g = indRankBall(m, n, r_target); -x0 = zeros(m*n, 1); - -ASSERT_TOL = 1e-5; - -%% run methods - -baseopt.display = 0; -baseopt.tol = 1e-6; -baseopt.maxit = 1000; -baseopt.Lf = 1; -baseopt.report = 1; - -opts = {}; -outs = {}; - -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking-nm'; -% opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbroyden'; opts{end}.linesearch = 'backtracking'; -% opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbroyden'; opts{end}.linesearch = 'backtracking-nm'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'rbroyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'rbroyden'; opts{end}.linesearch = 'backtracking-nm'; - -for i = 1:length(opts) - outs{end+1} = forbes(f, g, x0, [], [], opts{i}); - assert(outs{i}.solver.iterations < opts{i}.maxit); - assert(norm(outs{i}.solver.residual(end), 'inf') <= ASSERT_TOL); - fprintf('.'); -end diff --git a/tests/test_SolveSVM_random.m b/tests/test_SolveSVM_random.m deleted file mode 100644 index c070344..0000000 --- a/tests/test_SolveSVM_random.m +++ /dev/null @@ -1,66 +0,0 @@ -close all; -clear; - -% solve a small SVM problem via the dual QP using quadprog -% then compare with the solution found with ForBES - -n = 2100; % number of features (= number of variables minus one) -m = 130; % number of samples - -w = randn(n, 1); % N(0,1), 30% sparse - -A = randn(m, n); -btrue = sign(A*w); - -% noise is function of problem size use 0.1 for large problem -b = sign(btrue + sqrt(0.1)*randn(m,1)); % labels with noise -mu = 1.0; - -% solve dual problem using QUADPROG -BA = diag(sparse(b))*A; -Q = BA*BA'; -q = ones(size(A, 1), 1); -opt_qp = optimoptions('quadprog','Display','off'); -[lambda_qp, fval_qp, flag_qp, output_qp] = quadprog(Q, -q, [], [], [], [], zeros(m, 1), mu*ones(m, 1), [], opt_qp); -x_qp = BA'*lambda_qp; - -f = quadLoss(); -g = hingeLoss(mu, b); -constr = {A, -1, zeros(m, 1)}; -y0 = zeros(m, 1); - -TOL = 1e-8; -ASSERT_TOLX = 1e-4; -ASSERT_TOLF = 1e-6; - -%% adaptive - -baseopt.display = 0; -baseopt.adaptive = 1; -baseopt.maxit = 10000; -baseopt.tol = TOL; - -opts = {}; -outs = {}; - -opts{end+1} = baseopt; opts{end}.solver = 'fbs'; opts{end}.variant = 'basic'; -opts{end+1} = baseopt; opts{end}.solver = 'fbs'; opts{end}.variant = 'fast'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; - -for i = 1:length(opts) - outs{end+1} = forbes(f, g, y0, [], constr, opts{i}); - assert(outs{i}.flag == 0); - assert(norm(A*outs{i}.x1 - outs{i}.z, 'inf') <= 10*TOL); - assert(abs(outs{i}.solver.objective(end) - fval_qp)/(1+abs(fval_qp)) <= ASSERT_TOLF); - assert(norm(outs{i}.x1 - x_qp, inf)/(1+norm(x_qp, inf)) <= ASSERT_TOLX); - fprintf('.'); -end diff --git a/tests/test_SolveSparseLogReg_small.m b/tests/test_SolveSparseLogReg_small.m deleted file mode 100644 index 2f72026..0000000 --- a/tests/test_SolveSparseLogReg_small.m +++ /dev/null @@ -1,55 +0,0 @@ -close all; -clear; - -A = [1, 2, -1, -1; ... - -2, -1, 0, -1; ... - 3, 0, 4, -1; ... - -4, -1, -3, 1; ... - 5, 3, 2, 3]'; -b = [1, 2, 3, 4]'; - -[m, n] = size(A); - -f = logLoss(1.0); -aff = {A, -b}; -lam = 0.1; -g = l1Norm(lam); -x0 = zeros(n, 1); - -x_star = [0; 0; 2.114635341704963e-01; 0; 2.845881348733116e+00]; - -ASSERT_TOL = 1e-6; - -%% adaptive - -baseopt.display = 0; -baseopt.tol = 1e-8; -baseopt.maxit = 5000; - -opt_fbs = baseopt; opt_fbs.solver = 'fbs'; opt_fbs.variant = 'basic'; -out_fbs = forbes(f, g, x0, aff, [], opt_fbs); - -assert(out_fbs.solver.iterations < opt_fbs.maxit); - -opts = {}; -outs = {}; - -opts{end+1} = baseopt; opts{end}.solver = 'fbs'; opts{end}.variant = 'fast'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'minfbe'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking-armijo'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'zerofpr'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'bfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'lbfgs'; opts{end}.linesearch = 'backtracking'; -opts{end+1} = baseopt; opts{end}.solver = 'nama'; opts{end}.method = 'broyden'; opts{end}.linesearch = 'backtracking'; - -for i = 1:length(opts) - outs{end+1} = forbes(f, g, x0, aff, [], opts{i}); - assert(outs{i}.flag == 0); - assert(outs{i}.solver.objective(end) - out_fbs.solver.objective(end) <= ASSERT_TOL); - assert(norm(outs{i}.x - x_star,inf)/(1+norm(x_star,inf)) <= ASSERT_TOL); - fprintf('.'); -end diff --git a/tests/test_inequalities1.m b/tests/test_inequalities1.m deleted file mode 100644 index 64e4169..0000000 --- a/tests/test_inequalities1.m +++ /dev/null @@ -1,30 +0,0 @@ -close all; -clear; - -N_TESTS = 2000; - -m = 50; -n = 200; -A = randn(m, n); -b = randn(m, 1); -f = quadLoss(); -aff = {A, -b}; -lam = 0.3*norm(A'*b, 'inf'); -g = distBox(-1, 1, lam); -x0 = zeros(n, 1); - -prob = ProblemComposite(f, A, -b, [], [], [], g, [], [], x0); -opt.adaptive = 0; -opt.beta = 0.05; -[Lf, adaptive] = prob.Get_Lipschitz(opt); -gam = (1-opt.beta)/Lf; -ops = FBOperations(); - -for i = 1:N_TESTS - x = 3*randn(n, 1); - cache_x = FBCache(prob, x, gam, ops); - z = cache_x.Get_ProxGradStep(); - cache_z = FBCache(prob, z, gam, ops); - assert(cache_z.Get_f() + cache_x.gz <= cache_x.Get_FBE() - opt.beta/(2*gam)*cache_x.Get_NormFPR()^2 + abs(cache_x.Get_FBE())*1e-12); - assert(cache_z.Get_FBE() <= cache_z.Get_f() + cache_x.gz - 1/(2*gam)*cache_z.Get_NormFPR()^2 + abs(cache_z.Get_FBE())*1e-12); -end diff --git a/tests/test_inequalities2.m b/tests/test_inequalities2.m deleted file mode 100644 index 90142ce..0000000 --- a/tests/test_inequalities2.m +++ /dev/null @@ -1,36 +0,0 @@ -close all; -clear; - -N_TESTS = 2000; - -% dense Q - -n = 10; -m = 20; -Q = randn(n,n); -Q = 0.5*(Q'+Q); -Q = Q + (1e-1 - min(eig(Q)))*eye(n); -q = randn(n, 1); -A = randn(m, n); -low = -0.2; hi = 0.2; -f = quadratic(Q, q); -% g = distBox(low, hi); -g = l1Norm(); - -y0 = randn(m, 1); - -prob = ProblemComposite(conjugate(f), -A', [], [], [], [], conjugate(g), 1, [], y0); -opt.adaptive = 0; -opt.beta = 0.05; -[Lf, adaptive] = prob.Get_Lipschitz(opt); -gam = (1-opt.beta)/Lf; -ops = FBOperations(); - -for i = 1:N_TESTS - x = randn(m, 1); - cache_x = FBCache(prob, x, gam, ops); - z = cache_x.Get_ProxGradStep(); - cache_z = FBCache(prob, z, gam, ops); - assert(cache_z.Get_f() + cache_x.gz <= cache_x.Get_FBE() - opt.beta/(2*gam)*cache_x.Get_NormFPR()^2 + abs(cache_x.Get_FBE())*1e-12); - assert(cache_z.Get_FBE() <= cache_z.Get_f() + cache_x.gz - 1/(2*gam)*cache_z.Get_NormFPR()^2 + abs(cache_z.Get_FBE())*1e-12); -end diff --git a/tests/test_lqrCost.m b/tests/test_lqrCost.m deleted file mode 100644 index c7a8ff2..0000000 --- a/tests/test_lqrCost.m +++ /dev/null @@ -1,90 +0,0 @@ -close all; -clear; - -ASSERT_EPS = 1e-10; -N_TESTS = 20; - -% Generate problem - -n_x = 6; -n_u = 2; - -A = randn(n_x, n_x); A = 2*(A/norm(A)); -B = randn(n_x, n_u); B = B/norm(B); -Q = 1*eye(n_x); -R = 1e-1*eye(n_u); -Q_N = 1*Q; -N = 10; -x0 = randn(n_x, 1); - -% Build up big constraint/cost matrices, so as to be able to -% compute f conjugate and its gradient via some QP solver -% -% f(x) = (1/2)(x'*H*x) + ind{A*x = b} -% -% fc(y) = sup_x { y'*x - f(x) } -% => minimize_x f(x) - y'*x s.t. A*x = b -% => obtain x_sol = grad_fc_y -% => compute fc(y) = y'*x_sol - f(x_sol) - -block_eq = [A, B, -eye(n_x)]; -A_eq = sparse((N+1)*n_x, (N+1)*n_x + N*n_u); -diag_H = {}; -for i = 0:N-1 % build up constraints - basei = i*n_x; - basej = i*(n_x+n_u); - A_eq(basei+1:basei+n_x, basej+1:basej+2*n_x+n_u) = block_eq; - diag_H{2*i+1} = Q; - diag_H{2*i+2} = R; -end -diag_H{2*N+1} = Q_N; -H = blkdiag(diag_H{:}); -b_eq = zeros((N+1)*n_x, 1); - -% Test with no reference (zero reference) - -f = lqrCost(x0, Q, R, Q_N, A, B, N); -call_fc = f.makefconj(); - -for i=1:N_TESTS - y = randn(N*(n_x+n_u)+n_x, 1); - [fc_y, grad_fc_y] = call_fc(y); - % test conjugate subgradient theorem - fx = 0.5*(grad_fc_y'*H*grad_fc_y); - assert(abs(grad_fc_y'*y - fx - fc_y) <= 1e-12*(1+abs(fc_y))); - % evaluate gradient numerically - grad_fc_y_num = numdiff(call_fc, y); - assert(norm(grad_fc_y-grad_fc_y_num, 'inf') <= 1e-6*(1+norm(grad_fc_y, 'inf'))); - % evaluate by solving a QP -% opt_qp = optimoptions('quadprog','Display','off'); -% [grad_fc_y_qp] = quadprog(H,-y,[],[],A_eq,b_eq,[],[],[],opt_qp); -% fc_y_qp = y'*grad_fc_y_qp - 0.5*(grad_fc_y_qp'*H*grad_fc_y_qp); - % test equivalence -% assert(abs(fc_y-fc_y_qp) <= 1e-8); -% assert(norm(grad_fc_y-grad_fc_y_qp, 'inf') <= 1e-8); -end - -% Test with reference state - -xref = randn(n_x, 1); -tran = [repmat([Q*xref; zeros(n_u, 1)], N, 1); Q_N*xref]; -f = lqrCost(x0, Q, R, Q_N, A, B, N, xref); -call_fc = f.makefconj(); - -for i=1:N_TESTS - y = randn(N*(n_x+n_u)+n_x, 1); - [fc_y, grad_fc_y] = call_fc(y); - % test conjugate subgradient theorem - fx = 0.5*((grad_fc_y-tran)'*H*(grad_fc_y-tran)); - assert(abs(grad_fc_y'*y - fx - fc_y) <= 1e-12*(1+abs(fc_y))); - % evaluate gradient numerically - grad_fc_y_num = numdiff(call_fc, y); - assert(norm(grad_fc_y-grad_fc_y_num, 'inf') <= 1e-6*(1+norm(grad_fc_y, 'inf'))); - % evaluate by solving a QP -% opt_qp = optimoptions('quadprog','Display','off'); -% [grad_fc_y_qp] = quadprog(H,-y,[],[],A_eq,b_eq,[],[],[],opt_qp); -% fc_y_qp = y'*grad_fc_y_qp - 0.5*(grad_fc_y_qp'*H*grad_fc_y_qp); - % test equivalence -% assert(abs(fc_y-fc_y_qp) <= 1e-8); -% assert(norm(grad_fc_y-grad_fc_y_qp, 'inf') <= 1e-8); -end diff --git a/tests/test_quadratic.m b/tests/test_quadratic.m deleted file mode 100644 index 914e4cf..0000000 --- a/tests/test_quadratic.m +++ /dev/null @@ -1,50 +0,0 @@ -close all; -clear; - -ASSERT_EPS = 1e-10; -N_TESTS = 20; - -% dense Q - -n = 100; -Q = rand(n,n); -Q = Q'*Q + 1e-1*eye(n); -q = randn(n, 1); - -f = quadratic(Q, q); - -call_f = f.makef(); -call_fc = f.makefconj(); - -for i = 1:N_TESTS - x = randn(n, 1); - y = randn(n, 1); - [f_x, grad_f_x] = call_f(x); - [fc_y, grad_fc_y] = call_fc(y); - assert(f_x + fc_y >= x'*y); - assert(abs(f_x + call_fc(grad_f_x) - x'*grad_f_x) <= ASSERT_EPS); - assert(abs(fc_y + call_f(grad_fc_y) - grad_fc_y'*y) <= ASSERT_EPS); -end - -% sparse Q - -n = 500; -dens = 0.1; -rc = 0.1; -Q = sprandsym(n, dens, rc, 1) + 1e-1*speye(n); -q = randn(n, 1); - -f = quadratic(Q, q); - -call_f = f.makef(); -call_fc = f.makefconj(); - -for i = 1:N_TESTS - x = randn(n, 1); - y = randn(n, 1); - [f_x, grad_f_x] = call_f(x); - [fc_y, grad_fc_y] = call_fc(y); - assert(f_x + fc_y >= x'*y); - assert(abs(f_x + call_fc(grad_f_x) - x'*grad_f_x) <= ASSERT_EPS); - assert(abs(fc_y + call_f(grad_fc_y) - grad_fc_y'*y) <= ASSERT_EPS); -end diff --git a/tests/test_separableSum.m b/tests/test_separableSum.m deleted file mode 100644 index b35c769..0000000 --- a/tests/test_separableSum.m +++ /dev/null @@ -1,47 +0,0 @@ -close all; -clear; - -ASSERT_EPS = 1e-14; - -% two simple functions - -w = rand(10,1); -p = randn(10,1); -f1 = quadLoss(w, p); % R^10 -> R - -mu = 1; -f2 = logLoss(mu); % R^? -> R - -fSum = separableSum({f1, f2}, {10, 20}); % R^30 -> R -callfSum = fSum.makef(); - -for i = 1:100 - x = randn(30, 1); - [v1, grad1] = callfSum(x); - v2 = 0.5*((w.*(x(1:10)-p))'*(x(1:10)-p)) + mu*sum(log(1+exp(-x(11:end)))); - grad2 = [vec(w.*(x(1:10)-p)); -mu*exp(-x(11:end))./(1+exp(-x(11:end)))]; - assert(abs(v1-v2)/(1+abs(v2)) <= ASSERT_EPS); - assert(norm(grad1-grad2, inf)/(1+norm(grad2, inf)) <= ASSERT_EPS); -end - -% two less simple functions - -p = randn(20,30); -f1 = quadLoss(1, p); % R^{20x30} -> R - -mu = 1; -f2 = logLoss(mu); % R^? -> R - -fSum = separableSum({f1, f2}, {[20, 30], 50}); % R^650 -> R -callfSum = fSum.makef(); - -for i = 1:100 - x = randn(650, 1); - [v1, grad1] = callfSum(x); - v2 = 0.5*norm(reshape(x(1:600), 20, 30)-p,'fro')^2 + mu*sum(log(1+exp(-x(601:end)))); - grad2 = [vec(reshape(x(1:600), 20, 30)-p); -mu*exp(-x(601:end))./(1+exp(-x(601:end)))]; - assert(norm(v1-v2, inf)/(1+abs(v2)) <= ASSERT_EPS); - assert(norm(grad1-grad2, inf)/(1+norm(grad2, inf)) <= ASSERT_EPS); -end - -% composition with affine mappings diff --git a/tests/test_stackOp.m b/tests/test_stackOp.m deleted file mode 100644 index a25d1e2..0000000 --- a/tests/test_stackOp.m +++ /dev/null @@ -1,62 +0,0 @@ -close all; -clear; - -ASSERT_EPS = 1e-14; - -% two simple operators - -diag1 = randn(5,1); -op1 = diagOp(5, diag1); % R^5 -> R^5 -mat1 = diag(diag1); - -mat2 = randn(3, 5); -op2 = matOp(mat2); % R^5 -> R^3 - -opsStack = stackOp({op1, op2}); % R^5 -> R^8 -callOpsStack = opsStack.makeop(); - -for i = 1:100 - x = randn(5, 1); - y1 = callOpsStack(x); - y2 = [mat1*x; mat2*x]; - assert(norm(y1-y2, inf) <= ASSERT_EPS); -end - -callOpsStackAdj = opsStack.makeadj(); - -for i = 1:100 - y = randn(8, 1); - x1 = callOpsStackAdj(y); - x2 = [mat1', mat2']*y; - assert(norm(x1-x2, inf) <= ASSERT_EPS); -end - -% two less simple operators - -diag1 = randn(2,4); -op1 = diagOp([2,4], diag1); % R^{2x4} -> R^{2x4} - -diag2 = randn(2,4); -op2 = diagOp([2,4], diag2); % R^{2x4} -> R^{2x4} - -opsStack = stackOp({op1, op2}); % R^{2x4} -> R^{16} -assert(all(opsStack.n == [2, 4])); -assert(all(opsStack.m == [16, 1])); -callOpsStack = opsStack.makeop(); - -for i = 1:100 - x = randn(2, 4); - y1 = callOpsStack(x); - y2 = [diag1.*x, diag2.*x]; - y2 = y2(:); - assert(norm(y1-y2, inf) <= ASSERT_EPS); -end - -callOpsStackAdj = opsStack.makeadj(); - -for i = 1:100 - y = randn(16, 1); - x1 = callOpsStackAdj(y); - x2 = reshape(diag1(:).*y(1:8) + diag2(:).*y(9:16), 2, 4); - assert(norm(x1-x2, inf) <= ASSERT_EPS); -end diff --git a/tests/test_sumOp.m b/tests/test_sumOp.m deleted file mode 100644 index d0c287f..0000000 --- a/tests/test_sumOp.m +++ /dev/null @@ -1,62 +0,0 @@ -close all; -clear; - -ASSERT_EPS = 1e-14; - -% two simple operators - -diag1 = randn(5,1); -op1 = diagOp(5, diag1); % R^5 -> R^5 -mat1 = diag(diag1); - -mat2 = randn(5, 3); -op2 = matOp(mat2); % R^3 -> R^5 - -opsSum = sumOp({op1, op2}); % R^8 -> R^5 -callOpsSum = opsSum.makeop(); - -for i = 1:100 - x = randn(8, 1); - y1 = callOpsSum(x); - y2 = [mat1, mat2]*x; - assert(norm(y1-y2, inf) <= ASSERT_EPS); -end - -callOpsSumAdj = opsSum.makeadj(); - -for i = 1:100 - y = randn(5, 1); - x1 = callOpsSumAdj(y); - x2 = [mat1'*y; mat2'*y]; - assert(norm(x1-x2, inf) <= ASSERT_EPS); -end - -% two less simple operators - -diag1 = randn(2,4); -op1 = diagOp([2,4], diag1); % R^{2x4} -> R^{2x4} - -diag2 = randn(2,4); -op2 = diagOp([2,4], diag2); % R^{2x4} -> R^{2x4} - -opsSum = sumOp({op1, op2}); % R^16 -> R^{2x4} -assert(all(opsSum.n == [16, 1])); -assert(all(opsSum.m == [2, 4])); -callOpsSum = opsSum.makeop(); - -for i = 1:100 - x = randn(16, 1); - y1 = callOpsSum(x); - y2 = diag1.*reshape(x(1:8), 2, 4) + diag2.*reshape(x(9:16), 2, 4); - assert(norm(y1-y2, inf) <= ASSERT_EPS); -end - -callOpsSumAdj = opsSum.makeadj(); - -for i = 1:100 - y = randn(2, 4); - x1 = callOpsSumAdj(y); - x2 = [diag1.*y, diag2.*y]; - x2 = x2(:); - assert(norm(x1-x2, inf) <= ASSERT_EPS); -end