Skip to content

Commit 2f7a61b

Browse files
committed
Avoid disconnect on message timeout while waiting on monitor/signer
When sending/receiving `commitment_signed`/`revoke_and_ack`, we may expect the counterparty to follow up with one of their own in response. In some cases, they are not allowed to send it because they are actually waiting for one from us. Async monitor updates and signing requests may result in the message we need to send to the counterparty being delayed, and our disconnect logic previously did not consider that. It doesn't make sense to disconnect our counterparty when we're the ones seemingly blocking progress. This commit ensures we no longer disconnect when we're waiting on an async monitor update or signing request, unless we're negotiating quiescence. Note that while our counterparty is still able to enforce a similar disconnect logic on their side, as they have no insight into why we're not able to make progress, this commit at least helps prevent reconnection cycles with those that don't enforce one.
1 parent c4d23bc commit 2f7a61b

File tree

2 files changed

+164
-8
lines changed

2 files changed

+164
-8
lines changed

lightning/src/ln/async_signer_tests.rs

+138
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::chain::ChannelMonitorUpdateStatus;
2121
use crate::events::bump_transaction::WalletSource;
2222
use crate::events::{ClosureReason, Event};
2323
use crate::ln::chan_utils::ClosingTransaction;
24+
use crate::ln::channel::DISCONNECT_PEER_AWAITING_RESPONSE_TICKS;
2425
use crate::ln::channel_state::{ChannelDetails, ChannelShutdownState};
2526
use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields};
2627
use crate::ln::msgs::{BaseMessageHandler, ChannelMessageHandler, MessageSendEvent};
@@ -1091,3 +1092,140 @@ fn do_test_closing_signed(extra_closing_signed: bool, reconnect: bool) {
10911092
check_closed_event!(nodes[0], 1, ClosureReason::LocallyInitiatedCooperativeClosure, [nodes[1].node.get_our_node_id()], 100000);
10921093
check_closed_event!(nodes[1], 1, ClosureReason::CounterpartyInitiatedCooperativeClosure, [nodes[0].node.get_our_node_id()], 100000);
10931094
}
1095+
1096+
#[test]
1097+
fn test_no_disconnect_while_async_revoke_and_ack_expecting_remote_commitment_signed() {
1098+
// Nodes with async signers may be expecting to receive a `commitment_signed` from the
1099+
// counterparty even if a `revoke_and_ack` has yet to be sent due to an async signer. Test that
1100+
// we don't disconnect the async signer node due to not receiving the `commitment_signed` within
1101+
// the timeout while the `revoke_and_ack` is not ready.
1102+
let chanmon_cfgs = create_chanmon_cfgs(2);
1103+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1104+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1105+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1106+
let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
1107+
1108+
let node_id_0 = nodes[0].node.get_our_node_id();
1109+
let node_id_1 = nodes[1].node.get_our_node_id();
1110+
1111+
let payment_amount = 1_000_000;
1112+
send_payment(&nodes[0], &[&nodes[1]], payment_amount * 4);
1113+
1114+
nodes[1].disable_channel_signer_op(&node_id_0, &chan_id, SignerOp::ReleaseCommitmentSecret);
1115+
1116+
// We'll send a payment from both nodes to each other.
1117+
let (route1, payment_hash1, _, payment_secret1) =
1118+
get_route_and_payment_hash!(&nodes[0], &nodes[1], payment_amount);
1119+
let onion1 = RecipientOnionFields::secret_only(payment_secret1);
1120+
let payment_id1 = PaymentId(payment_hash1.0);
1121+
nodes[0].node.send_payment_with_route(route1, payment_hash1, onion1, payment_id1).unwrap();
1122+
check_added_monitors(&nodes[0], 1);
1123+
1124+
let (route2, payment_hash2, _, payment_secret2) =
1125+
get_route_and_payment_hash!(&nodes[1], &nodes[0], payment_amount);
1126+
let onion2 = RecipientOnionFields::secret_only(payment_secret2);
1127+
let payment_id2 = PaymentId(payment_hash2.0);
1128+
nodes[1].node.send_payment_with_route(route2, payment_hash2, onion2, payment_id2).unwrap();
1129+
check_added_monitors(&nodes[1], 1);
1130+
1131+
let update = get_htlc_update_msgs!(&nodes[0], node_id_1);
1132+
nodes[1].node.handle_update_add_htlc(node_id_0, &update.update_add_htlcs[0]);
1133+
nodes[1].node.handle_commitment_signed_batch_test(node_id_0, &update.commitment_signed);
1134+
check_added_monitors(&nodes[1], 1);
1135+
1136+
let update = get_htlc_update_msgs!(&nodes[1], node_id_0);
1137+
nodes[0].node.handle_update_add_htlc(node_id_1, &update.update_add_htlcs[0]);
1138+
nodes[0].node.handle_commitment_signed_batch_test(node_id_1, &update.commitment_signed);
1139+
check_added_monitors(&nodes[0], 1);
1140+
1141+
// nodes[0] can only respond with a `revoke_and_ack`. The `commitment_signed` that would follow
1142+
// is blocked on receiving a counterparty `revoke_and_ack`, which nodes[1] is still pending on.
1143+
let revoke_and_ack = get_event_msg!(&nodes[0], MessageSendEvent::SendRevokeAndACK, node_id_1);
1144+
nodes[1].node.handle_revoke_and_ack(node_id_0, &revoke_and_ack);
1145+
check_added_monitors(&nodes[1], 1);
1146+
1147+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
1148+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
1149+
1150+
// nodes[0] will disconnect the counterparty as it's waiting on a `revoke_and_ack`.
1151+
// nodes[1] is waiting on a `commitment_signed`, but since it hasn't yet sent its own
1152+
// `revoke_and_ack`, it shouldn't disconnect yet.
1153+
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
1154+
nodes[0].node.timer_tick_occurred();
1155+
nodes[1].node.timer_tick_occurred();
1156+
}
1157+
let has_disconnect_event = |event| {
1158+
if let MessageSendEvent::HandleError { action, .. } = event {
1159+
if let msgs::ErrorAction::DisconnectPeerWithWarning { .. } = action {
1160+
true
1161+
} else {
1162+
false
1163+
}
1164+
} else {
1165+
false
1166+
}
1167+
};
1168+
assert!(nodes[0].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event));
1169+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
1170+
}
1171+
1172+
#[test]
1173+
fn test_no_disconnect_while_async_commitment_signed_expecting_remote_revoke_and_ack() {
1174+
// Nodes with async signers may be expecting to receive a `revoke_and_ack` from the
1175+
// counterparty even if a `commitment_signed` has yet to be sent due to an async signer. Test
1176+
// that we don't disconnect the async signer node due to not receiving the `revoke_and_ack`
1177+
// within the timeout while the `commitment_signed` is not ready.
1178+
let chanmon_cfgs = create_chanmon_cfgs(2);
1179+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1180+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1181+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1182+
let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
1183+
1184+
let node_id_0 = nodes[0].node.get_our_node_id();
1185+
let node_id_1 = nodes[1].node.get_our_node_id();
1186+
1187+
// Route a payment and attempt to claim it.
1188+
let payment_amount = 1_000_000;
1189+
let (preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1]], payment_amount);
1190+
nodes[1].node.claim_funds(preimage);
1191+
check_added_monitors(&nodes[1], 1);
1192+
1193+
// We'll disable signing counterparty commitments on the payment sender.
1194+
nodes[0].disable_channel_signer_op(&node_id_1, &chan_id, SignerOp::SignCounterpartyCommitment);
1195+
1196+
// After processing the `update_fulfill`, they'll only be able to send `revoke_and_ack` until
1197+
// the `commitment_signed` is no longer pending.
1198+
let update = get_htlc_update_msgs!(&nodes[1], node_id_0);
1199+
nodes[0].node.handle_update_fulfill_htlc(node_id_1, &update.update_fulfill_htlcs[0]);
1200+
nodes[0].node.handle_commitment_signed_batch_test(node_id_1, &update.commitment_signed);
1201+
check_added_monitors(&nodes[0], 1);
1202+
1203+
let revoke_and_ack = get_event_msg!(&nodes[0], MessageSendEvent::SendRevokeAndACK, node_id_1);
1204+
nodes[1].node.handle_revoke_and_ack(node_id_0, &revoke_and_ack);
1205+
check_added_monitors(&nodes[1], 1);
1206+
1207+
// The payment sender shouldn't disconnect the counterparty due to a missing `revoke_and_ack`
1208+
// because the `commitment_signed` isn't ready yet. The payment recipient may disconnect the
1209+
// sender because it doesn't have an async signer and it's expecting a timely `revoke_and_ack`
1210+
// response.
1211+
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
1212+
nodes[0].node.timer_tick_occurred();
1213+
nodes[1].node.timer_tick_occurred();
1214+
}
1215+
let has_disconnect_event = |event| {
1216+
if let MessageSendEvent::HandleError { action, .. } = event {
1217+
if let msgs::ErrorAction::DisconnectPeerWithWarning { .. } = action {
1218+
true
1219+
} else {
1220+
false
1221+
}
1222+
} else {
1223+
false
1224+
}
1225+
};
1226+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
1227+
assert!(nodes[1].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event));
1228+
1229+
expect_payment_sent(&nodes[0], preimage, None, false, false);
1230+
expect_payment_claimed!(nodes[1], payment_hash, payment_amount);
1231+
}

lightning/src/ln/channel.rs

+26-8
Original file line numberDiff line numberDiff line change
@@ -3205,23 +3205,32 @@ impl<SP: Deref> ChannelContext<SP> where SP::Target: SignerProvider {
32053205
}
32063206
}
32073207

3208+
/// Checks whether the channel has any HTLC additions, HTLC removals, or fee updates that have
3209+
/// been sent by either side but not yet irrevocably committed on both commitments because we're
3210+
/// waiting on a pending monitor update or signer request.
3211+
pub fn is_monitor_or_signer_pending_channel_update(&self) -> bool {
3212+
self.monitor_pending_revoke_and_ack || self.signer_pending_revoke_and_ack
3213+
|| self.monitor_pending_commitment_signed || self.signer_pending_commitment_update
3214+
}
3215+
32083216
/// Checks whether the channel has any HTLC additions, HTLC removals, or fee updates that have
32093217
/// been sent by either side but not yet irrevocably committed on both commitments. Holding cell
32103218
/// updates are not considered because they haven't been sent to the peer yet.
32113219
///
32123220
/// This can be used to satisfy quiescence's requirement when sending `stfu`:
32133221
/// - MUST NOT send `stfu` if any of the sender's htlc additions, htlc removals
32143222
/// or fee updates are pending for either peer.
3223+
///
3224+
/// Note that it is still possible for an update to be pending that's not captured here due to a
3225+
/// pending monitor update or signer request. `is_monitor_or_signer_pending_channel_update`
3226+
/// should also be checked in such cases.
32153227
fn has_pending_channel_update(&self) -> bool {
32163228
// An update from the local/remote node may be pending on the remote/local commitment since
32173229
// they are not tracked within our state, so we rely on whether any `commitment_signed` or
32183230
// `revoke_and_ack` messages are owed.
32193231
//
32203232
// We check these flags first as they are more likely to be set.
3221-
if self.channel_state.is_awaiting_remote_revoke() || self.expecting_peer_commitment_signed
3222-
|| self.monitor_pending_revoke_and_ack || self.signer_pending_revoke_and_ack
3223-
|| self.monitor_pending_commitment_signed || self.signer_pending_commitment_update
3224-
{
3233+
if self.channel_state.is_awaiting_remote_revoke() || self.expecting_peer_commitment_signed {
32253234
return true;
32263235
}
32273236

@@ -7415,10 +7424,15 @@ impl<SP: Deref> FundedChannel<SP> where
74157424
self.context.channel_state.is_peer_disconnected()
74167425
// Cleared upon receiving `stfu`.
74177426
|| self.context.channel_state.is_local_stfu_sent()
7427+
// Cleared upon becoming quiescent.
7428+
|| self.context.channel_state.is_remote_stfu_sent()
74187429
// Cleared upon receiving a message that triggers the end of quiescence.
74197430
|| self.context.channel_state.is_quiescent()
7420-
// Cleared upon receiving `revoke_and_ack`.
7421-
|| self.context.has_pending_channel_update()
7431+
// Cleared upon receiving `revoke_and_ack`. Since we're not queiscent, as we just
7432+
// checked above, we intentionally don't disconnect our counterparty if we're waiting on
7433+
// a monitor update or signer request.
7434+
|| (self.context.has_pending_channel_update()
7435+
&& !self.context.is_monitor_or_signer_pending_channel_update())
74227436
{
74237437
// This is the first tick we've seen after expecting to make forward progress.
74247438
self.context.sent_message_awaiting_response = Some(1);
@@ -9306,7 +9320,9 @@ impl<SP: Deref> FundedChannel<SP> where
93069320
);
93079321
debug_assert!(self.context.is_live());
93089322

9309-
if self.context.has_pending_channel_update() {
9323+
if self.context.has_pending_channel_update()
9324+
|| self.context.is_monitor_or_signer_pending_channel_update()
9325+
{
93109326
return Err(ChannelError::Ignore(
93119327
"We cannot send `stfu` while state machine is pending".to_owned()
93129328
));
@@ -9394,7 +9410,9 @@ impl<SP: Deref> FundedChannel<SP> where
93949410
// We were expecting to receive `stfu` because we already sent ours.
93959411
self.mark_response_received();
93969412

9397-
if self.context.has_pending_channel_update() {
9413+
if self.context.has_pending_channel_update()
9414+
|| self.context.is_monitor_or_signer_pending_channel_update()
9415+
{
93989416
// Since we've already sent `stfu`, it should not be possible for one of our updates to
93999417
// be pending, so anything pending currently must be from a counterparty update.
94009418
return Err(ChannelError::WarnAndDisconnect(

0 commit comments

Comments
 (0)