reuse initial full proposal step in laplace

SteveBronder · SteveBronder · commit 184f792764e6 · 2026-03-24T10:15:23.000-04:00
diff --git a/stan/math/mix/functor/laplace_marginal_density_estimator.hpp b/stan/math/mix/functor/laplace_marginal_density_estimator.hpp
@@ -342,6 +342,9 @@ struct NewtonState {
   /** @brief Status of the most recent Wolfe line search */
   WolfeStatus wolfe_status;
 
+  /** @brief Cached proposal evaluated before the Wolfe line search. */
+  WolfeData proposal;
+
   /** @brief Workspace vector: b = W * theta + grad(log_lik) */
   Eigen::VectorXd b;
 
@@ -377,6 +380,7 @@ struct NewtonState {
       : wolfe_info(std::forward<ObjFun>(obj_fun), covariance.llt().solve(theta_init),
                    std::forward<ThetaInitializer>(theta_init),
                    std::forward<ThetaGradFun>(theta_grad_f)),
+        proposal(theta_size),
         b(theta_size),
         B(theta_size, theta_size),
         prev_g(theta_size) {
@@ -407,9 +411,12 @@ struct NewtonState {
    */
   const auto& prev() const& { return wolfe_info.prev_; }
   auto&& prev() && { return std::move(wolfe_info).prev(); }
+  auto& proposal_step() & { return proposal; }
+  const auto& proposal_step() const& { return proposal; }
+  auto&& proposal_step() && { return std::move(proposal); }
   template <typename Options>
   inline void update_next_step(const Options& options) {
-    this->prev().update(this->curr());
+    this->prev().swap(this->curr());
     this->curr().alpha()
         = std::clamp(this->curr().alpha(), 0.0, options.line_search.max_alpha);
   }
@@ -485,7 +492,8 @@ struct CholeskyWSolverDiag {
    * @tparam LLFun Type of the log-likelihood functor
    * @tparam LLTupleArgs Type of the likelihood arguments tuple
    * @tparam CovarMat Type of the covariance matrix
-   * @param[in,out] state Shared Newton state (modified: B, b, curr().a())
+   * @param[in,out] state Shared Newton state (modified: B, b,
+   * proposal_step().a())
    * @param[in] ll_fun Log-likelihood functor
    * @param[in,out] ll_args Additional arguments for the likelihood
    * @param[in] covariance Prior covariance matrix Sigma
@@ -521,12 +529,12 @@ struct CholeskyWSolverDiag {
 
     // 3. Factorize B with jittering fallback
     llt_with_jitter(llt_B, state.B);
-    // 4. Solve for curr.a
+    // 4. Solve for the raw Newton proposal in a-space.
     state.b.noalias() = (W_diag.array() * state.prev().theta().array()).matrix()
                         + state.prev().theta_grad();
     auto L = llt_B.matrixL();
     auto LT = llt_B.matrixU();
-    state.curr().a().noalias()
+    state.proposal_step().a().noalias()
         = state.b
           - W_r_diag.asDiagonal()
                 * LT.solve(
@@ -615,7 +623,8 @@ struct CholeskyWSolverBlock {
    * @tparam LLFun Type of the log-likelihood functor
    * @tparam LLTupleArgs Type of the likelihood arguments tuple
    * @tparam CovarMat Type of the covariance matrix
-   * @param[in,out] state Shared Newton state (modified: B, b, curr().a())
+   * @param[in,out] state Shared Newton state (modified: B, b,
+   * proposal_step().a())
    * @param[in] ll_fun Log-likelihood functor
    * @param[in,out] ll_args Additional arguments for the likelihood
    * @param[in] covariance Prior covariance matrix Sigma
@@ -653,12 +662,12 @@ struct CholeskyWSolverBlock {
     // 4. Factorize B with jittering fallback
     llt_with_jitter(llt_B, state.B);
 
-    // 5. Solve for curr.a
+    // 5. Solve for the raw Newton proposal in a-space.
     state.b.noalias()
         = W_block * state.prev().theta() + state.prev().theta_grad();
     auto L = llt_B.matrixL();
     auto LT = llt_B.matrixU();
-    state.curr().a().noalias()
+    state.proposal_step().a().noalias()
         = state.b - W_r * LT.solve(L.solve(W_r * (covariance * state.b)));
   }
 
@@ -736,7 +745,7 @@ struct CholeskyKSolver {
    * @tparam LLFun Type of the log-likelihood functor
    * @tparam LLTupleArgs Type of the likelihood arguments tuple
    * @tparam CovarMat Type of the covariance matrix
-   * @param[in] state Shared Newton state (modified: B, b, curr().a())
+   * @param[in] state Shared Newton state (modified: B, b, proposal_step().a())
    * @param[in] ll_fun Log-likelihood functor
    * @param[in] ll_args Additional arguments for the likelihood
    * @param[in] covariance Prior covariance matrix Sigma
@@ -763,12 +772,12 @@ struct CholeskyKSolver {
     // 3. Factorize B with jittering fallback
     llt_with_jitter(llt_B, state.B);
 
-    // 4. Solve for curr.a
+    // 4. Solve for the raw Newton proposal in a-space.
     state.b.noalias()
         = W_full * state.prev().theta() + state.prev().theta_grad();
     auto L = llt_B.matrixL();
     auto LT = llt_B.matrixU();
-    state.curr().a().noalias()
+    state.proposal_step().a().noalias()
         = K_root.transpose().template triangularView<Eigen::Upper>().solve(
             LT.solve(L.solve(K_root.transpose() * state.b)));
   }
@@ -833,7 +842,7 @@ struct LUSolver {
    * @tparam LLFun Type of the log-likelihood functor
    * @tparam LLTupleArgs Type of the likelihood arguments tuple
    * @tparam CovarMat Type of the covariance matrix
-   * @param[in,out] state Shared Newton state (modified: b, curr().a())
+   * @param[in,out] state Shared Newton state (modified: b, proposal_step().a())
    * @param[in] ll_fun Log-likelihood functor
    * @param[in,out] ll_args Additional arguments for the likelihood
    * @param[in] covariance Prior covariance matrix Sigma
@@ -855,10 +864,10 @@ struct LUSolver {
     lu.compute(Eigen::MatrixXd::Identity(theta_size, theta_size)
                + covariance * W_full);
 
-    // 3. Solve for curr.a
+    // 3. Solve for the raw Newton proposal in a-space.
     state.b.noalias()
         = W_full * state.prev().theta() + state.prev().theta_grad();
-    state.curr().a().noalias()
+    state.proposal_step().a().noalias()
         = state.b - W_full * lu.solve(covariance * state.b);
   }
 
@@ -932,29 +941,32 @@ inline auto run_newton_loop(SolverPolicy& solver, NewtonStateT& state,
     solver.solve_step(state, ll_fun, ll_args, covariance,
                       options.hessian_block_size, msgs);
     if (!state.final_loop) {
-      state.wolfe_info.p_ = state.curr().a() - state.prev().a();
+      auto&& proposal = state.proposal_step();
+      state.wolfe_info.p_ = proposal.a() - state.prev().a();
       state.prev_g.noalias() = -covariance * state.prev().a()
                                + covariance * state.prev().theta_grad();
       state.wolfe_info.init_dir_ = state.prev_g.dot(state.wolfe_info.p_);
       // Flip direction if not ascending
       state.wolfe_info.flip_direction();
       auto&& scratch = state.wolfe_info.scratch_;
-      scratch.alpha() = 1.0;
-      update_fun(scratch, state.curr(), state.prev(), scratch.eval_,
-                 state.wolfe_info.p_);
-      // Save the full Newton step objective before the Wolfe line search
-      // overwrites scratch with intermediate trial points.
-      const double full_newton_obj = scratch.eval_.obj();
-      if (scratch.alpha() <= options.line_search.min_alpha) {
-        state.wolfe_status.accept_ = false;
-        finish_update = true;
+      proposal.eval_.alpha() = 1.0;
+      const bool proposal_valid = update_fun(
+          proposal, state.curr(), state.prev(), proposal.eval_,
+          state.wolfe_info.p_);
+      const bool cached_proposal_ok
+          = proposal_valid && std::isfinite(proposal.obj())
+            && std::isfinite(proposal.dir())
+            && proposal.alpha() > options.line_search.min_alpha;
+      if (!cached_proposal_ok) {
+        state.wolfe_status
+            = WolfeStatus{WolfeReturn::StepTooSmall, 1, 0, false};
       } else if (options.line_search.max_iterations == 0) {
-        state.curr().update(scratch);
-        state.wolfe_status.accept_ = true;
+        state.curr().update(proposal);
+        state.wolfe_status = WolfeStatus{WolfeReturn::Continue, 1, 0, true};
       } else {
-        Eigen::VectorXd s = scratch.a() - state.prev().a();
+        Eigen::VectorXd s = proposal.a() - state.prev().a();
         auto full_step_grad
-            = (-covariance * scratch.a() + covariance * scratch.theta_grad())
+            = (-covariance * proposal.a() + covariance * proposal.theta_grad())
                   .eval();
         state.curr().alpha() = barzilai_borwein_step_size(
             s, full_step_grad, state.prev_g, state.prev().alpha(),
@@ -963,47 +975,30 @@ inline auto run_newton_loop(SolverPolicy& solver, NewtonStateT& state,
         state.wolfe_status = internal::wolfe_line_search(
             state.wolfe_info, update_fun, options.line_search, msgs);
       }
-      // When the Wolfe line search rejects, don't immediately terminate.
-      // Instead, let the Newton loop try at least one more iteration.
-      // The original code compared the stale curr.obj() (which equalled
-      // prev.obj() after the swap in update_next_step) and would always
-      // terminate on ANY Wolfe rejection — even on the very first Newton
-      // step.  Now we only declare search_failed if the full Newton step
-      // itself didn't improve the objective.
-      bool search_failed;
-      if (!state.wolfe_status.accept_) {
-        if (full_newton_obj > state.prev().obj()) {
-          // The full Newton step (evaluated before Wolfe ran) improved
-          // the objective.  Re-evaluate scratch at the full Newton step
-          // so we can accept it as the current iterate.
-          scratch.eval_.alpha() = 1.0;
-          update_fun(scratch, state.curr(), state.prev(), scratch.eval_,
-                     state.wolfe_info.p_);
-          state.curr().update(scratch);
-          state.wolfe_status.accept_ = true;
-          search_failed = false;
-        } else {
-          search_failed = true;
-        }
-      } else {
+      bool search_failed = !state.wolfe_status.accept_;
+      const bool proposal_armijo_ok
+          = cached_proposal_ok
+            && internal::check_armijo(
+                   proposal.obj(), state.prev().obj(), proposal.alpha(),
+                   state.wolfe_info.init_dir_, options.line_search);
+      if (search_failed && proposal_armijo_ok) {
+        state.curr().update(proposal);
+        state.wolfe_status = WolfeStatus{WolfeReturn::Armijo,
+                                         state.wolfe_status.num_evals_,
+                                         state.wolfe_status.num_backtracks_,
+                                         true};
         search_failed = false;
       }
-      /**
-       * Stop when objective change is small (absolute AND relative), or when
-       * a rejected Wolfe step fails to improve; finish_update then exits the
-       * Newton loop.
-       */
-      double obj_change = std::abs(state.curr().obj() - state.prev().obj());
       bool objective_converged
-          = obj_change < options.tolerance
-            && obj_change < options.tolerance * std::abs(state.prev().obj());
+          = state.wolfe_status.accept_
+            && std::abs(state.curr().obj() - state.prev().obj())
+                   < options.tolerance;
       finish_update = objective_converged || search_failed;
     }
     if (finish_update) {
       if (!state.final_loop && state.wolfe_status.accept_) {
         // Do one final loop with exact wolfe conditions
         state.final_loop = true;
-        // NOTE: Swapping here so we need to swap prev and curr later
         state.update_next_step(options);
         continue;
       }
diff --git a/stan/math/mix/functor/wolfe_line_search.hpp b/stan/math/mix/functor/wolfe_line_search.hpp
@@ -461,6 +461,12 @@ struct WolfeData {
     a_.swap(other.a_);
     eval_ = other.eval_;
   }
+  void swap(WolfeData& other) {
+    theta_.swap(other.theta_);
+    theta_grad_.swap(other.theta_grad_);
+    a_.swap(other.a_);
+    std::swap(eval_, other.eval_);
+  }
   void update(WolfeData& other, const Eval& eval) {
     theta_.swap(other.theta_);
     a_.swap(other.a_);
diff --git a/test/unit/math/laplace/laplace_marginal_density_estimator_test.cpp b/test/unit/math/laplace/laplace_marginal_density_estimator_test.cpp
@@ -0,0 +1,133 @@
+#include <gtest/gtest.h>
+#include <stan/math.hpp>
+#include <stan/math/mix.hpp>
+
+#include <cmath>
+#include <sstream>
+#include <tuple>
+
+namespace stan::math {
+namespace {
+
+struct IdentityCovariance {
+  template <typename Stream>
+  Eigen::MatrixXd operator()(Stream* /*msgs*/) const {
+    return Eigen::MatrixXd::Identity(1, 1);
+  }
+};
+
+struct QuarticLikelihood {
+  template <typename Theta>
+  auto operator()(const Theta& theta, std::ostream* /*msgs*/) const {
+    const auto& x = theta(0);
+    const auto x_sq = stan::math::square(x);
+    return 2.0 * x - 0.5 * x_sq - 0.5 * stan::math::square(x_sq);
+  }
+};
+
+struct TinyQuarticLikelihood {
+  template <typename Theta>
+  auto operator()(const Theta& theta, std::ostream* /*msgs*/) const {
+    return 1e-8 * QuarticLikelihood{}(theta, nullptr);
+  }
+};
+
+struct StubNewtonSolver {
+  double proposal_a;
+
+  template <typename NewtonStateT, typename LLFun, typename LLTupleArgs,
+            typename CovarMat>
+  void solve_step(NewtonStateT& state, const LLFun& /*ll_fun*/,
+                  const LLTupleArgs& /*ll_args*/,
+                  const CovarMat& /*covariance*/, int /*hessian_block_size*/,
+                  std::ostream* /*msgs*/) const {
+    state.proposal_step().a()(0) = proposal_a;
+  }
+
+  double compute_log_determinant() const { return 0.0; }
+
+  template <typename NewtonStateT>
+  double build_result(NewtonStateT& state, double /*log_det*/) const {
+    return state.prev().a()(0);
+  }
+};
+
+template <typename Likelihood>
+double run_laplace(const Likelihood& ll_fun, double theta0_value,
+                   double tolerance, int max_num_steps,
+                   int max_steps_line_search, std::ostream* msgs) {
+  Eigen::VectorXd theta0(1);
+  theta0 << theta0_value;
+  return stan::math::laplace_marginal_tol<false>(
+      ll_fun, std::tuple<>{}, 1, IdentityCovariance{}, std::tuple<>{},
+      std::make_tuple(theta0, tolerance, max_num_steps, 1,
+                      max_steps_line_search, true),
+      msgs);
+}
+
+TEST(LaplaceMarginalDensityEstimator, PublicLineSearchMatchesDirectStep) {
+  std::ostringstream no_search_msgs;
+  std::ostringstream wolfe_msgs;
+
+  const double no_search = run_laplace(QuarticLikelihood{}, 2.0, 1e-12, 50, 0,
+                                       &no_search_msgs);
+  const double with_wolfe = run_laplace(QuarticLikelihood{}, 2.0, 1e-12, 50,
+                                        1000, &wolfe_msgs);
+
+  EXPECT_TRUE(std::isfinite(no_search));
+  EXPECT_TRUE(std::isfinite(with_wolfe));
+  EXPECT_NEAR(no_search, with_wolfe, 1e-8);
+}
+
+TEST(LaplaceMarginalDensityEstimator, AbsoluteObjectiveToleranceStopsNearZero) {
+  std::ostringstream msgs;
+
+  const double result
+      = run_laplace(TinyQuarticLikelihood{}, 0.0, 1e-8, 6, 1000, &msgs);
+
+  EXPECT_TRUE(std::isfinite(result));
+  EXPECT_EQ(msgs.str().find("max number of iterations"), std::string::npos);
+}
+
+TEST(LaplaceMarginalDensityEstimator,
+     InvalidCachedProposalDoesNotTriggerArmijoFallback) {
+  Eigen::MatrixXd covariance = Eigen::MatrixXd::Identity(1, 1);
+  Eigen::VectorXd theta0 = Eigen::VectorXd::Zero(1);
+  auto obj_fun = [](const auto& /*a*/, const auto& /*theta*/) {
+    return -1.0;
+  };
+  auto theta_grad_f = [](const auto& theta) {
+    return Eigen::VectorXd::Zero(theta.size());
+  };
+  internal::NewtonState state(1, obj_fun, theta_grad_f, covariance, theta0);
+  laplace_options_base options;
+  options.hessian_block_size = 1;
+  options.max_num_steps = 1;
+  options.tolerance = 1e-12;
+  options.line_search.max_iterations = 5;
+  options.line_search.min_alpha = 1e-8;
+
+  StubNewtonSolver solver{5.0};
+  Eigen::Index step_iter = 1;
+  auto failing_update = [min_alpha = options.line_search.min_alpha](
+                            auto& /*proposal*/, auto&& /*curr*/,
+                            auto&& /*prev*/, auto& eval_in, auto&& /*p*/) {
+    eval_in.alpha() = 0.5 * min_alpha;
+    return false;
+  };
+  auto unused_ll = [](const auto& /*theta*/, std::ostream* /*msgs*/) {
+    return 0.0;
+  };
+
+  const double result
+      = internal::run_newton_loop(solver, state, options, step_iter, unused_ll,
+                                  std::tuple<>{}, covariance, failing_update,
+                                  nullptr);
+
+  EXPECT_DOUBLE_EQ(result, 0.0);
+  EXPECT_FALSE(state.wolfe_status.accept_);
+  EXPECT_EQ(state.wolfe_status.stop_, internal::WolfeReturn::StepTooSmall);
+}
+
+}  // namespace
+}  // namespace stan::math
diff --git a/test/unit/math/laplace/wolfe_line_search_test.cpp b/test/unit/math/laplace/wolfe_line_search_test.cpp