Skip to content

Commit d8c8195

Browse files
committed
Avoid disconnect on message timeout while waiting on monitor/signer
When sending/receiving `commitment_signed`/`revoke_and_ack`, we may expect the counterparty to follow up with one of their own in response. In some cases, they are not allowed to send it because they are actually waiting for one from us. Async monitor updates and signing requests may result in the message we need to send to the counterparty being delayed, and our disconnect logic previously did not consider that. It doesn't make sense to disconnect our counterparty when we're the ones seemingly blocking progress. This commit ensures we no longer disconnect when we're waiting on an async monitor update or signing request, unless we're negotiating quiescence. Note that while our counterparty is still able to enforce a similar disconnect logic on their side, as they have no insight into why we're not able to make progress, this commit at least helps prevent reconnection cycles with those that don't enforce one.
1 parent c608592 commit d8c8195

File tree

3 files changed

+159
-8
lines changed

3 files changed

+159
-8
lines changed

lightning/src/ln/async_signer_tests.rs

+128
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::chain::ChannelMonitorUpdateStatus;
2121
use crate::events::bump_transaction::WalletSource;
2222
use crate::events::{ClosureReason, Event};
2323
use crate::ln::chan_utils::ClosingTransaction;
24+
use crate::ln::channel::DISCONNECT_PEER_AWAITING_RESPONSE_TICKS;
2425
use crate::ln::channel_state::{ChannelDetails, ChannelShutdownState};
2526
use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields};
2627
use crate::ln::msgs::{BaseMessageHandler, ChannelMessageHandler, MessageSendEvent};
@@ -1091,3 +1092,130 @@ fn do_test_closing_signed(extra_closing_signed: bool, reconnect: bool) {
10911092
check_closed_event!(nodes[0], 1, ClosureReason::LocallyInitiatedCooperativeClosure, [nodes[1].node.get_our_node_id()], 100000);
10921093
check_closed_event!(nodes[1], 1, ClosureReason::CounterpartyInitiatedCooperativeClosure, [nodes[0].node.get_our_node_id()], 100000);
10931094
}
1095+
1096+
#[test]
1097+
fn test_no_disconnect_while_async_revoke_and_ack_expecting_remote_commitment_signed() {
1098+
// Nodes with async signers may be expecting to receive a `commitment_signed` from the
1099+
// counterparty even if a `revoke_and_ack` has yet to be sent due to an async signer. Test that
1100+
// we don't disconnect the async signer node due to not receiving the `commitment_signed` within
1101+
// the timeout while the `revoke_and_ack` is not ready.
1102+
let chanmon_cfgs = create_chanmon_cfgs(2);
1103+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1104+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1105+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1106+
let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
1107+
1108+
let node_id_0 = nodes[0].node.get_our_node_id();
1109+
let node_id_1 = nodes[1].node.get_our_node_id();
1110+
1111+
let payment_amount = 1_000_000;
1112+
send_payment(&nodes[0], &[&nodes[1]], payment_amount * 4);
1113+
1114+
nodes[1].disable_channel_signer_op(&node_id_0, &chan_id, SignerOp::ReleaseCommitmentSecret);
1115+
1116+
// We'll send a payment from both nodes to each other.
1117+
let (route1, payment_hash1, _, payment_secret1) =
1118+
get_route_and_payment_hash!(&nodes[0], &nodes[1], payment_amount);
1119+
let onion1 = RecipientOnionFields::secret_only(payment_secret1);
1120+
let payment_id1 = PaymentId(payment_hash1.0);
1121+
nodes[0].node.send_payment_with_route(route1, payment_hash1, onion1, payment_id1).unwrap();
1122+
check_added_monitors(&nodes[0], 1);
1123+
1124+
let (route2, payment_hash2, _, payment_secret2) =
1125+
get_route_and_payment_hash!(&nodes[1], &nodes[0], payment_amount);
1126+
let onion2 = RecipientOnionFields::secret_only(payment_secret2);
1127+
let payment_id2 = PaymentId(payment_hash2.0);
1128+
nodes[1].node.send_payment_with_route(route2, payment_hash2, onion2, payment_id2).unwrap();
1129+
check_added_monitors(&nodes[1], 1);
1130+
1131+
let update = get_htlc_update_msgs!(&nodes[0], node_id_1);
1132+
nodes[1].node.handle_update_add_htlc(node_id_0, &update.update_add_htlcs[0]);
1133+
nodes[1].node.handle_commitment_signed_batch_test(node_id_0, &update.commitment_signed);
1134+
check_added_monitors(&nodes[1], 1);
1135+
1136+
let update = get_htlc_update_msgs!(&nodes[1], node_id_0);
1137+
nodes[0].node.handle_update_add_htlc(node_id_1, &update.update_add_htlcs[0]);
1138+
nodes[0].node.handle_commitment_signed_batch_test(node_id_1, &update.commitment_signed);
1139+
check_added_monitors(&nodes[0], 1);
1140+
1141+
// nodes[0] can only respond with a `revoke_and_ack`. The `commitment_signed` that would follow
1142+
// is blocked on receiving a counterparty `revoke_and_ack`, which nodes[1] is still pending on.
1143+
let revoke_and_ack = get_event_msg!(&nodes[0], MessageSendEvent::SendRevokeAndACK, node_id_1);
1144+
nodes[1].node.handle_revoke_and_ack(node_id_0, &revoke_and_ack);
1145+
check_added_monitors(&nodes[1], 1);
1146+
1147+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
1148+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
1149+
1150+
// nodes[0] will disconnect the counterparty as it's waiting on a `revoke_and_ack`.
1151+
// nodes[1] is waiting on a `commitment_signed`, but since it hasn't yet sent its own
1152+
// `revoke_and_ack`, it shouldn't disconnect yet.
1153+
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
1154+
nodes[0].node.timer_tick_occurred();
1155+
nodes[1].node.timer_tick_occurred();
1156+
}
1157+
let has_disconnect_event = |event| {
1158+
matches!(
1159+
event, MessageSendEvent::HandleError { action , .. }
1160+
if matches!(action, msgs::ErrorAction::DisconnectPeerWithWarning { .. })
1161+
)
1162+
};
1163+
assert!(nodes[0].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event));
1164+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
1165+
}
1166+
1167+
#[test]
1168+
fn test_no_disconnect_while_async_commitment_signed_expecting_remote_revoke_and_ack() {
1169+
// Nodes with async signers may be expecting to receive a `revoke_and_ack` from the
1170+
// counterparty even if a `commitment_signed` has yet to be sent due to an async signer. Test
1171+
// that we don't disconnect the async signer node due to not receiving the `revoke_and_ack`
1172+
// within the timeout while the `commitment_signed` is not ready.
1173+
let chanmon_cfgs = create_chanmon_cfgs(2);
1174+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1175+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1176+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1177+
let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
1178+
1179+
let node_id_0 = nodes[0].node.get_our_node_id();
1180+
let node_id_1 = nodes[1].node.get_our_node_id();
1181+
1182+
// Route a payment and attempt to claim it.
1183+
let payment_amount = 1_000_000;
1184+
let (preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1]], payment_amount);
1185+
nodes[1].node.claim_funds(preimage);
1186+
check_added_monitors(&nodes[1], 1);
1187+
1188+
// We'll disable signing counterparty commitments on the payment sender.
1189+
nodes[0].disable_channel_signer_op(&node_id_1, &chan_id, SignerOp::SignCounterpartyCommitment);
1190+
1191+
// After processing the `update_fulfill`, they'll only be able to send `revoke_and_ack` until
1192+
// the `commitment_signed` is no longer pending.
1193+
let update = get_htlc_update_msgs!(&nodes[1], node_id_0);
1194+
nodes[0].node.handle_update_fulfill_htlc(node_id_1, &update.update_fulfill_htlcs[0]);
1195+
nodes[0].node.handle_commitment_signed_batch_test(node_id_1, &update.commitment_signed);
1196+
check_added_monitors(&nodes[0], 1);
1197+
1198+
let revoke_and_ack = get_event_msg!(&nodes[0], MessageSendEvent::SendRevokeAndACK, node_id_1);
1199+
nodes[1].node.handle_revoke_and_ack(node_id_0, &revoke_and_ack);
1200+
check_added_monitors(&nodes[1], 1);
1201+
1202+
// The payment sender shouldn't disconnect the counterparty due to a missing `revoke_and_ack`
1203+
// because the `commitment_signed` isn't ready yet. The payment recipient may disconnect the
1204+
// sender because it doesn't have an async signer and it's expecting a timely
1205+
// `commitment_signed` response.
1206+
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
1207+
nodes[0].node.timer_tick_occurred();
1208+
nodes[1].node.timer_tick_occurred();
1209+
}
1210+
let has_disconnect_event = |event| {
1211+
matches!(
1212+
event, MessageSendEvent::HandleError { action , .. }
1213+
if matches!(action, msgs::ErrorAction::DisconnectPeerWithWarning { .. })
1214+
)
1215+
};
1216+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
1217+
assert!(nodes[1].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event));
1218+
1219+
expect_payment_sent(&nodes[0], preimage, None, false, false);
1220+
expect_payment_claimed!(nodes[1], payment_hash, payment_amount);
1221+
}

lightning/src/ln/channel.rs

+24-8
Original file line numberDiff line numberDiff line change
@@ -3205,23 +3205,32 @@ impl<SP: Deref> ChannelContext<SP> where SP::Target: SignerProvider {
32053205
}
32063206
}
32073207

3208+
/// Checks whether the channel has any HTLC additions, HTLC removals, or fee updates that have
3209+
/// been sent by either side but not yet irrevocably committed on both commitments because we're
3210+
/// waiting on a pending monitor update or signer request.
3211+
pub fn is_monitor_or_signer_pending_channel_update(&self) -> bool {
3212+
self.monitor_pending_revoke_and_ack || self.signer_pending_revoke_and_ack
3213+
|| self.monitor_pending_commitment_signed || self.signer_pending_commitment_update
3214+
}
3215+
32083216
/// Checks whether the channel has any HTLC additions, HTLC removals, or fee updates that have
32093217
/// been sent by either side but not yet irrevocably committed on both commitments. Holding cell
32103218
/// updates are not considered because they haven't been sent to the peer yet.
32113219
///
32123220
/// This can be used to satisfy quiescence's requirement when sending `stfu`:
32133221
/// - MUST NOT send `stfu` if any of the sender's htlc additions, htlc removals
32143222
/// or fee updates are pending for either peer.
3223+
///
3224+
/// Note that it is still possible for an update to be pending that's not captured here due to a
3225+
/// pending monitor update or signer request. `is_monitor_or_signer_pending_channel_update`
3226+
/// should also be checked in such cases.
32153227
fn has_pending_channel_update(&self) -> bool {
32163228
// An update from the local/remote node may be pending on the remote/local commitment since
32173229
// they are not tracked within our state, so we rely on whether any `commitment_signed` or
32183230
// `revoke_and_ack` messages are owed.
32193231
//
32203232
// We check these flags first as they are more likely to be set.
3221-
if self.channel_state.is_awaiting_remote_revoke() || self.expecting_peer_commitment_signed
3222-
|| self.monitor_pending_revoke_and_ack || self.signer_pending_revoke_and_ack
3223-
|| self.monitor_pending_commitment_signed || self.signer_pending_commitment_update
3224-
{
3233+
if self.channel_state.is_awaiting_remote_revoke() || self.expecting_peer_commitment_signed {
32253234
return true;
32263235
}
32273236

@@ -7419,8 +7428,11 @@ impl<SP: Deref> FundedChannel<SP> where
74197428
|| self.context.channel_state.is_remote_stfu_sent()
74207429
// Cleared upon receiving a message that triggers the end of quiescence.
74217430
|| self.context.channel_state.is_quiescent()
7422-
// Cleared upon receiving `revoke_and_ack`.
7423-
|| self.context.has_pending_channel_update()
7431+
// Cleared upon receiving `revoke_and_ack`. Since we're not queiscent, as we just
7432+
// checked above, we intentionally don't disconnect our counterparty if we're waiting on
7433+
// a monitor update or signer request.
7434+
|| (self.context.has_pending_channel_update()
7435+
&& !self.context.is_monitor_or_signer_pending_channel_update())
74247436
{
74257437
// This is the first tick we've seen after expecting to make forward progress.
74267438
self.context.sent_message_awaiting_response = Some(1);
@@ -9308,7 +9320,9 @@ impl<SP: Deref> FundedChannel<SP> where
93089320
);
93099321
debug_assert!(self.context.is_live());
93109322

9311-
if self.context.has_pending_channel_update() {
9323+
if self.context.has_pending_channel_update()
9324+
|| self.context.is_monitor_or_signer_pending_channel_update()
9325+
{
93129326
return Err(ChannelError::Ignore(
93139327
"We cannot send `stfu` while state machine is pending".to_owned()
93149328
));
@@ -9396,7 +9410,9 @@ impl<SP: Deref> FundedChannel<SP> where
93969410
// We were expecting to receive `stfu` because we already sent ours.
93979411
self.mark_response_received();
93989412

9399-
if self.context.has_pending_channel_update() {
9413+
if self.context.has_pending_channel_update()
9414+
|| self.context.is_monitor_or_signer_pending_channel_update()
9415+
{
94009416
// Since we've already sent `stfu`, it should not be possible for one of our updates to
94019417
// be pending, so anything pending currently must be from a counterparty update.
94029418
return Err(ChannelError::WarnAndDisconnect(

lightning/src/ln/quiescence_tests.rs

+7
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,13 @@ fn test_quiescence_timeout_while_waiting_for_holder_stfu() {
482482
check_added_monitors(&nodes[0], 1);
483483
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
484484

485+
// Since nodes[0] has no context of the quiescence attempt yet, it should not disconnect the
486+
// counterparty just because it hasn't completed it's monitor update in a timely manner.
487+
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
488+
nodes[0].node.timer_tick_occurred();
489+
}
490+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
491+
485492
nodes[0].node.handle_stfu(node_id_1, &stfu);
486493
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
487494

0 commit comments

Comments
 (0)