Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix outbound payments memory leak on buggy router #3531

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lightning/src/ln/channelmanager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14783,6 +14783,7 @@ mod tests {
},
_ => panic!("unexpected error")
}
assert!(nodes[0].node.list_recent_payments().is_empty());
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions lightning/src/ln/functional_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6235,6 +6235,7 @@ fn test_payment_route_reaching_same_channel_twice() {
RecipientOnionFields::secret_only(our_payment_secret), PaymentId(our_payment_hash.0)
), false, APIError::InvalidRoute { ref err },
assert_eq!(err, &"Path went through the same channel twice"));
assert!(nodes[0].node.list_recent_payments().is_empty());
}

// BOLT 2 Requirements for the Sender when constructing and sending an update_add_htlc message.
Expand Down
107 changes: 68 additions & 39 deletions lightning/src/ln/outbound_payment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ impl OutboundPayments {
{
let onion_session_privs = self.add_new_pending_payment(payment_hash, recipient_onion.clone(), payment_id, None, route, None, None, entropy_source, best_block_height)?;
self.pay_route_internal(route, payment_hash, &recipient_onion, None, None, payment_id, None,
onion_session_privs, node_signer, best_block_height, &send_payment_along_path)
&onion_session_privs, node_signer, best_block_height, &send_payment_along_path)
.map_err(|e| { self.remove_outbound_if_all_failed(payment_id, &e); e })
}

Expand Down Expand Up @@ -983,7 +983,7 @@ impl OutboundPayments {

let result = self.pay_route_internal(
&route, payment_hash, &recipient_onion, keysend_preimage, invoice_request, payment_id,
Some(route_params.final_value_msat), onion_session_privs, node_signer, best_block_height,
Some(route_params.final_value_msat), &onion_session_privs, node_signer, best_block_height,
&send_payment_along_path
);
log_info!(
Expand All @@ -992,9 +992,9 @@ impl OutboundPayments {
);
if let Err(e) = result {
self.handle_pay_route_err(
e, payment_id, payment_hash, route, route_params, router, first_hops,
&inflight_htlcs, entropy_source, node_signer, best_block_height, logger,
pending_events, &send_payment_along_path
e, payment_id, payment_hash, route, route_params, onion_session_privs, router, first_hops,
&inflight_htlcs, entropy_source, node_signer, best_block_height, logger, pending_events,
&send_payment_along_path
);
}
Ok(())
Expand Down Expand Up @@ -1269,12 +1269,16 @@ impl OutboundPayments {
})?;

let res = self.pay_route_internal(&route, payment_hash, &recipient_onion,
keysend_preimage, None, payment_id, None, onion_session_privs, node_signer,
keysend_preimage, None, payment_id, None, &onion_session_privs, node_signer,
best_block_height, &send_payment_along_path);
log_info!(logger, "Sending payment with id {} and hash {} returned {:?}",
payment_id, payment_hash, res);
if let Err(e) = res {
self.handle_pay_route_err(e, payment_id, payment_hash, route, route_params, router, first_hops, &inflight_htlcs, entropy_source, node_signer, best_block_height, logger, pending_events, &send_payment_along_path);
self.handle_pay_route_err(
e, payment_id, payment_hash, route, route_params, onion_session_privs, router, first_hops,
&inflight_htlcs, entropy_source, node_signer, best_block_height, logger, pending_events,
&send_payment_along_path
);
}
Ok(())
}
Expand Down Expand Up @@ -1426,19 +1430,25 @@ impl OutboundPayments {
}
};
let res = self.pay_route_internal(&route, payment_hash, &recipient_onion, keysend_preimage,
invoice_request.as_ref(), payment_id, Some(total_msat), onion_session_privs, node_signer,
invoice_request.as_ref(), payment_id, Some(total_msat), &onion_session_privs, node_signer,
best_block_height, &send_payment_along_path);
log_info!(logger, "Result retrying payment id {}: {:?}", &payment_id, res);
if let Err(e) = res {
self.handle_pay_route_err(e, payment_id, payment_hash, route, route_params, router, first_hops, inflight_htlcs, entropy_source, node_signer, best_block_height, logger, pending_events, send_payment_along_path);
self.handle_pay_route_err(
e, payment_id, payment_hash, route, route_params, onion_session_privs, router, first_hops,
inflight_htlcs, entropy_source, node_signer, best_block_height, logger, pending_events,
send_payment_along_path
);
}
}

fn handle_pay_route_err<R: Deref, NS: Deref, ES: Deref, IH, SP, L: Deref>(
&self, err: PaymentSendFailure, payment_id: PaymentId, payment_hash: PaymentHash, route: Route,
mut route_params: RouteParameters, router: &R, first_hops: Vec<ChannelDetails>,
inflight_htlcs: &IH, entropy_source: &ES, node_signer: &NS, best_block_height: u32, logger: &L,
pending_events: &Mutex<VecDeque<(events::Event, Option<EventCompletionAction>)>>, send_payment_along_path: &SP,
mut route_params: RouteParameters, onion_session_privs: Vec<[u8; 32]>, router: &R,
first_hops: Vec<ChannelDetails>, inflight_htlcs: &IH, entropy_source: &ES, node_signer: &NS,
best_block_height: u32, logger: &L,
pending_events: &Mutex<VecDeque<(events::Event, Option<EventCompletionAction>)>>,
send_payment_along_path: &SP,
)
where
R::Target: Router,
Expand All @@ -1450,10 +1460,24 @@ impl OutboundPayments {
{
match err {
PaymentSendFailure::AllFailedResendSafe(errs) => {
self.remove_session_privs(payment_id, route.paths.iter().zip(onion_session_privs.iter()));
Self::push_path_failed_evs_and_scids(payment_id, payment_hash, &mut route_params, route.paths, errs.into_iter().map(|e| Err(e)), logger, pending_events);
self.find_route_and_send_payment(payment_hash, payment_id, route_params, router, first_hops, inflight_htlcs, entropy_source, node_signer, best_block_height, logger, pending_events, send_payment_along_path);
},
PaymentSendFailure::PartialFailure { failed_paths_retry: Some(mut retry), results, .. } => {
debug_assert_eq!(results.len(), route.paths.len());
debug_assert_eq!(results.len(), onion_session_privs.len());
let failed_paths = results.iter().zip(route.paths.iter().zip(onion_session_privs.iter()))
.filter_map(|(path_res, (path, session_priv))| {
match path_res {
// While a MonitorUpdateInProgress is an Err(_), the payment is still
// considered "in flight" and we shouldn't remove it from the
// PendingOutboundPayment set.
Ok(_) | Err(APIError::MonitorUpdateInProgress) => None,
Copy link
Contributor Author

@valentinewallace valentinewallace Jan 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like unfortunately all tests pass with this diff:

diff --git a/lightning/src/ln/outbound_payment.rs b/lightning/src/ln/outbound_payment.rs
index 68d4125a3..430fb166c 100644
--- a/lightning/src/ln/outbound_payment.rs
+++ b/lightning/src/ln/outbound_payment.rs
@@ -1454,7 +1454,8 @@ impl OutboundPayments {
                                                        // While a MonitorUpdateInProgress is an Err(_), the payment is still
                                                        // considered "in flight" and we shouldn't remove it from the
                                                        // PendingOutboundPayment set.
-                                                       Ok(_) | Err(APIError::MonitorUpdateInProgress) => None,
+                                                       Ok(_) => None,
                                                        _ => Some((path, session_priv))
                                                }
                                        });

Even in the follow-up #3534 when rebased on this PR. Seems pre-existing though.

_ => Some((path, session_priv))
}
});
self.remove_session_privs(payment_id, failed_paths);
Self::push_path_failed_evs_and_scids(payment_id, payment_hash, &mut retry, route.paths, results.into_iter(), logger, pending_events);
// Some paths were sent, even if we failed to send the full MPP value our recipient may
// misbehave and claim the funds, at which point we have to consider the payment sent, so
Expand All @@ -1467,11 +1491,13 @@ impl OutboundPayments {
},
PaymentSendFailure::PathParameterError(results) => {
log_error!(logger, "Failed to send to route due to parameter error in a single path. Your router is buggy");
self.remove_session_privs(payment_id, route.paths.iter().zip(onion_session_privs.iter()));
Self::push_path_failed_evs_and_scids(payment_id, payment_hash, &mut route_params, route.paths, results.into_iter(), logger, pending_events);
self.abandon_payment(payment_id, PaymentFailureReason::UnexpectedError, pending_events);
},
PaymentSendFailure::ParameterError(e) => {
log_error!(logger, "Failed to send to route due to parameter error: {:?}. Your router is buggy", e);
self.remove_session_privs(payment_id, route.paths.iter().zip(onion_session_privs.iter()));
self.abandon_payment(payment_id, PaymentFailureReason::UnexpectedError, pending_events);
},
PaymentSendFailure::DuplicatePayment => debug_assert!(false), // unreachable
Expand Down Expand Up @@ -1511,6 +1537,21 @@ impl OutboundPayments {
}
}

// If a payment fails after adding the pending payment but before any HTLCs are locked into
// channels, we need to clear the session_privs in order for abandoning the payment to succeed.
fn remove_session_privs<'a, I: Iterator<Item = (&'a Path, &'a [u8; 32])>>(
&self, payment_id: PaymentId, path_session_priv: I
) {
if let Some(payment) = self.pending_outbound_payments.lock().unwrap().get_mut(&payment_id) {
for (path, session_priv_bytes) in path_session_priv {
let removed = payment.remove(session_priv_bytes, Some(path));
debug_assert!(removed, "This can't happen as the payment has an entry for this path added by callers");
}
} else {
debug_assert!(false, "This can't happen as the payment was added by callers");
}
}

pub(super) fn send_probe<ES: Deref, NS: Deref, F>(
&self, path: Path, probing_cookie_secret: [u8; 32], entropy_source: &ES, node_signer: &NS,
best_block_height: u32, send_payment_along_path: F
Expand Down Expand Up @@ -1542,7 +1583,7 @@ impl OutboundPayments {

let recipient_onion_fields = RecipientOnionFields::spontaneous_empty();
match self.pay_route_internal(&route, payment_hash, &recipient_onion_fields,
None, None, payment_id, None, onion_session_privs, node_signer, best_block_height,
None, None, payment_id, None, &onion_session_privs, node_signer, best_block_height,
&send_payment_along_path
) {
Ok(()) => Ok((payment_hash, payment_id)),
Expand Down Expand Up @@ -1733,7 +1774,7 @@ impl OutboundPayments {
fn pay_route_internal<NS: Deref, F>(
&self, route: &Route, payment_hash: PaymentHash, recipient_onion: &RecipientOnionFields,
keysend_preimage: Option<PaymentPreimage>, invoice_request: Option<&InvoiceRequest>,
payment_id: PaymentId, recv_value_msat: Option<u64>, onion_session_privs: Vec<[u8; 32]>,
payment_id: PaymentId, recv_value_msat: Option<u64>, onion_session_privs: &Vec<[u8; 32]>,
node_signer: &NS, best_block_height: u32, send_payment_along_path: &F
) -> Result<(), PaymentSendFailure>
where
Expand Down Expand Up @@ -1784,30 +1825,12 @@ impl OutboundPayments {
let cur_height = best_block_height + 1;
let mut results = Vec::new();
debug_assert_eq!(route.paths.len(), onion_session_privs.len());
for (path, session_priv_bytes) in route.paths.iter().zip(onion_session_privs.into_iter()) {
let mut path_res = send_payment_along_path(SendAlongPathArgs {
for (path, session_priv_bytes) in route.paths.iter().zip(onion_session_privs.iter()) {
let path_res = send_payment_along_path(SendAlongPathArgs {
path: &path, payment_hash: &payment_hash, recipient_onion, total_value,
cur_height, payment_id, keysend_preimage: &keysend_preimage, invoice_request,
session_priv_bytes
session_priv_bytes: *session_priv_bytes
});
match path_res {
Ok(_) => {},
Err(APIError::MonitorUpdateInProgress) => {
// While a MonitorUpdateInProgress is an Err(_), the payment is still
// considered "in flight" and we shouldn't remove it from the
// PendingOutboundPayment set.
},
Err(_) => {
let mut pending_outbounds = self.pending_outbound_payments.lock().unwrap();
if let Some(payment) = pending_outbounds.get_mut(&payment_id) {
let removed = payment.remove(&session_priv_bytes, Some(path));
debug_assert!(removed, "This can't happen as the payment has an entry for this path added by callers");
} else {
debug_assert!(false, "This can't happen as the payment was added by callers");
path_res = Err(APIError::APIMisuseError { err: "Internal error: payment disappeared during processing. Please report this bug!".to_owned() });
}
}
}
results.push(path_res);
}
let mut has_ok = false;
Expand Down Expand Up @@ -1872,17 +1895,23 @@ impl OutboundPayments {
F: Fn(SendAlongPathArgs) -> Result<(), APIError>,
{
self.pay_route_internal(route, payment_hash, &recipient_onion,
keysend_preimage, None, payment_id, recv_value_msat, onion_session_privs,
keysend_preimage, None, payment_id, recv_value_msat, &onion_session_privs,
node_signer, best_block_height, &send_payment_along_path)
.map_err(|e| { self.remove_outbound_if_all_failed(payment_id, &e); e })
}

// If we failed to send any paths, remove the new PaymentId from the `pending_outbound_payments`
// map as the payment is free to be resent.
fn remove_outbound_if_all_failed(&self, payment_id: PaymentId, err: &PaymentSendFailure) {
if let &PaymentSendFailure::AllFailedResendSafe(_) = err {
let removed = self.pending_outbound_payments.lock().unwrap().remove(&payment_id).is_some();
debug_assert!(removed, "We should always have a pending payment to remove here");
match err {
PaymentSendFailure::AllFailedResendSafe(_)
| PaymentSendFailure::ParameterError(_)
| PaymentSendFailure::PathParameterError(_) =>
{
let removed = self.pending_outbound_payments.lock().unwrap().remove(&payment_id).is_some();
debug_assert!(removed, "We should always have a pending payment to remove here");
},
PaymentSendFailure::DuplicatePayment | PaymentSendFailure::PartialFailure { .. } => {}
}
}

Expand Down
77 changes: 76 additions & 1 deletion lightning/src/ln/payment_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::types::payment::{PaymentHash, PaymentSecret, PaymentPreimage};
use crate::ln::chan_utils;
use crate::ln::msgs::ChannelMessageHandler;
use crate::ln::onion_utils;
use crate::ln::outbound_payment::{IDEMPOTENCY_TIMEOUT_TICKS, Retry, RetryableSendFailure};
use crate::ln::outbound_payment::{IDEMPOTENCY_TIMEOUT_TICKS, ProbeSendFailure, Retry, RetryableSendFailure};
use crate::routing::gossip::{EffectiveCapacity, RoutingFees};
use crate::routing::router::{get_route, Path, PaymentParameters, Route, Router, RouteHint, RouteHintHop, RouteHop, RouteParameters};
use crate::routing::scoring::ChannelUsage;
Expand Down Expand Up @@ -1249,6 +1249,7 @@ fn sent_probe_is_probe_of_sending_node() {
// First check we refuse to build a single-hop probe
let (route, _, _, _) = get_route_and_payment_hash!(&nodes[0], nodes[1], 100_000);
assert!(nodes[0].node.send_probe(route.paths[0].clone()).is_err());
assert!(nodes[0].node.list_recent_payments().is_empty());

// Then build an actual two-hop probing path
let (route, _, _, _) = get_route_and_payment_hash!(&nodes[0], nodes[2], 100_000);
Expand Down Expand Up @@ -4375,3 +4376,77 @@ fn test_non_strict_forwarding() {
let events = nodes[0].node.get_and_clear_pending_events();
expect_payment_failed_conditions_event(events, payment_hash, false, PaymentFailedConditions::new().blamed_scid(routed_scid));
}

#[test]
fn remove_pending_outbounds_on_buggy_router() {
// Ensure that if a payment errors due to a bogus route, we'll abandon the payment and remove the
// pending outbound from storage.
let chanmon_cfgs = create_chanmon_cfgs(2);
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
create_announced_chan_between_nodes(&nodes, 0, 1);

let amt_msat = 10_000;
let payment_id = PaymentId([42; 32]);
let payment_params = PaymentParameters::from_node_id(nodes[1].node.get_our_node_id(), 0)
.with_bolt11_features(nodes[1].node.bolt11_invoice_features()).unwrap();
let (mut route, payment_hash, _, payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[1], payment_params, amt_msat);

// Extend the path by itself, essentially simulating route going through same channel twice
let cloned_hops = route.paths[0].hops.clone();
route.paths[0].hops.extend_from_slice(&cloned_hops);
let route_params = route.route_params.clone().unwrap();
nodes[0].router.expect_find_route(route_params.clone(), Ok(route.clone()));

nodes[0].node.send_payment(
payment_hash, RecipientOnionFields::secret_only(payment_secret), payment_id, route_params,
Retry::Attempts(1) // Even though another attempt is allowed, the payment should fail
).unwrap();
let events = nodes[0].node.get_and_clear_pending_events();
assert_eq!(events.len(), 2);
match &events[0] {
Event::PaymentPathFailed { failure, payment_failed_permanently, .. } => {
assert_eq!(failure, &PathFailure::InitialSend {
err: APIError::InvalidRoute { err: "Path went through the same channel twice".to_string() }
});
assert!(!payment_failed_permanently);
},
_ => panic!()
}
match events[1] {
Event::PaymentFailed { reason, .. } => {
assert_eq!(reason.unwrap(), PaymentFailureReason::UnexpectedError);
},
_ => panic!()
}
assert!(nodes[0].node.list_recent_payments().is_empty());
}

#[test]
fn remove_pending_outbound_probe_on_buggy_path() {
// Ensure that if a probe errors due to a bogus route, we'll return an error and remove the
// pending outbound from storage.
let chanmon_cfgs = create_chanmon_cfgs(2);
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
create_announced_chan_between_nodes(&nodes, 0, 1);

let amt_msat = 10_000;
let payment_params = PaymentParameters::from_node_id(nodes[1].node.get_our_node_id(), 0)
.with_bolt11_features(nodes[1].node.bolt11_invoice_features()).unwrap();
let (mut route, _, _, _) = get_route_and_payment_hash!(nodes[0], nodes[1], payment_params, amt_msat);

// Extend the path by itself, essentially simulating route going through same channel twice
let cloned_hops = route.paths[0].hops.clone();
route.paths[0].hops.extend_from_slice(&cloned_hops);

assert_eq!(
nodes[0].node.send_probe(route.paths.pop().unwrap()).unwrap_err(),
ProbeSendFailure::ParameterError(
APIError::InvalidRoute { err: "Path went through the same channel twice".to_string() }
)
);
assert!(nodes[0].node.list_recent_payments().is_empty());
}
4 changes: 4 additions & 0 deletions pending_changelog/3531-buggy-router-leak.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## Bug Fixes

* Fixed a rare case where a custom router returning a buggy route could result in holding onto a
pending payment forever and in some cases failing to generate a PaymentFailed event (#3531).
Loading