Skip to content

Commit 22d4b42

Browse files
authored
Merge pull request #3721 from wpaulino/no-disconnect-async-monitor-signer
Avoid disconnect on message timeout while waiting on monitor/signer
2 parents 22a18a8 + ede5154 commit 22d4b42

File tree

3 files changed

+174
-41
lines changed

3 files changed

+174
-41
lines changed

lightning/src/ln/async_signer_tests.rs

+128
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::chain::ChannelMonitorUpdateStatus;
2121
use crate::events::bump_transaction::WalletSource;
2222
use crate::events::{ClosureReason, Event};
2323
use crate::ln::chan_utils::ClosingTransaction;
24+
use crate::ln::channel::DISCONNECT_PEER_AWAITING_RESPONSE_TICKS;
2425
use crate::ln::channel_state::{ChannelDetails, ChannelShutdownState};
2526
use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields};
2627
use crate::ln::msgs::{BaseMessageHandler, ChannelMessageHandler, MessageSendEvent};
@@ -1091,3 +1092,130 @@ fn do_test_closing_signed(extra_closing_signed: bool, reconnect: bool) {
10911092
check_closed_event!(nodes[0], 1, ClosureReason::LocallyInitiatedCooperativeClosure, [nodes[1].node.get_our_node_id()], 100000);
10921093
check_closed_event!(nodes[1], 1, ClosureReason::CounterpartyInitiatedCooperativeClosure, [nodes[0].node.get_our_node_id()], 100000);
10931094
}
1095+
1096+
#[test]
1097+
fn test_no_disconnect_while_async_revoke_and_ack_expecting_remote_commitment_signed() {
1098+
// Nodes with async signers may be expecting to receive a `commitment_signed` from the
1099+
// counterparty even if a `revoke_and_ack` has yet to be sent due to an async signer. Test that
1100+
// we don't disconnect the async signer node due to not receiving the `commitment_signed` within
1101+
// the timeout while the `revoke_and_ack` is not ready.
1102+
let chanmon_cfgs = create_chanmon_cfgs(2);
1103+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1104+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1105+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1106+
let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
1107+
1108+
let node_id_0 = nodes[0].node.get_our_node_id();
1109+
let node_id_1 = nodes[1].node.get_our_node_id();
1110+
1111+
let payment_amount = 1_000_000;
1112+
send_payment(&nodes[0], &[&nodes[1]], payment_amount * 4);
1113+
1114+
nodes[1].disable_channel_signer_op(&node_id_0, &chan_id, SignerOp::ReleaseCommitmentSecret);
1115+
1116+
// We'll send a payment from both nodes to each other.
1117+
let (route1, payment_hash1, _, payment_secret1) =
1118+
get_route_and_payment_hash!(&nodes[0], &nodes[1], payment_amount);
1119+
let onion1 = RecipientOnionFields::secret_only(payment_secret1);
1120+
let payment_id1 = PaymentId(payment_hash1.0);
1121+
nodes[0].node.send_payment_with_route(route1, payment_hash1, onion1, payment_id1).unwrap();
1122+
check_added_monitors(&nodes[0], 1);
1123+
1124+
let (route2, payment_hash2, _, payment_secret2) =
1125+
get_route_and_payment_hash!(&nodes[1], &nodes[0], payment_amount);
1126+
let onion2 = RecipientOnionFields::secret_only(payment_secret2);
1127+
let payment_id2 = PaymentId(payment_hash2.0);
1128+
nodes[1].node.send_payment_with_route(route2, payment_hash2, onion2, payment_id2).unwrap();
1129+
check_added_monitors(&nodes[1], 1);
1130+
1131+
let update = get_htlc_update_msgs!(&nodes[0], node_id_1);
1132+
nodes[1].node.handle_update_add_htlc(node_id_0, &update.update_add_htlcs[0]);
1133+
nodes[1].node.handle_commitment_signed_batch_test(node_id_0, &update.commitment_signed);
1134+
check_added_monitors(&nodes[1], 1);
1135+
1136+
let update = get_htlc_update_msgs!(&nodes[1], node_id_0);
1137+
nodes[0].node.handle_update_add_htlc(node_id_1, &update.update_add_htlcs[0]);
1138+
nodes[0].node.handle_commitment_signed_batch_test(node_id_1, &update.commitment_signed);
1139+
check_added_monitors(&nodes[0], 1);
1140+
1141+
// nodes[0] can only respond with a `revoke_and_ack`. The `commitment_signed` that would follow
1142+
// is blocked on receiving a counterparty `revoke_and_ack`, which nodes[1] is still pending on.
1143+
let revoke_and_ack = get_event_msg!(&nodes[0], MessageSendEvent::SendRevokeAndACK, node_id_1);
1144+
nodes[1].node.handle_revoke_and_ack(node_id_0, &revoke_and_ack);
1145+
check_added_monitors(&nodes[1], 1);
1146+
1147+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
1148+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
1149+
1150+
// nodes[0] will disconnect the counterparty as it's waiting on a `revoke_and_ack`.
1151+
// nodes[1] is waiting on a `commitment_signed`, but since it hasn't yet sent its own
1152+
// `revoke_and_ack`, it shouldn't disconnect yet.
1153+
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
1154+
nodes[0].node.timer_tick_occurred();
1155+
nodes[1].node.timer_tick_occurred();
1156+
}
1157+
let has_disconnect_event = |event| {
1158+
matches!(
1159+
event, MessageSendEvent::HandleError { action , .. }
1160+
if matches!(action, msgs::ErrorAction::DisconnectPeerWithWarning { .. })
1161+
)
1162+
};
1163+
assert!(nodes[0].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event));
1164+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
1165+
}
1166+
1167+
#[test]
1168+
fn test_no_disconnect_while_async_commitment_signed_expecting_remote_revoke_and_ack() {
1169+
// Nodes with async signers may be expecting to receive a `revoke_and_ack` from the
1170+
// counterparty even if a `commitment_signed` has yet to be sent due to an async signer. Test
1171+
// that we don't disconnect the async signer node due to not receiving the `revoke_and_ack`
1172+
// within the timeout while the `commitment_signed` is not ready.
1173+
let chanmon_cfgs = create_chanmon_cfgs(2);
1174+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1175+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1176+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1177+
let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
1178+
1179+
let node_id_0 = nodes[0].node.get_our_node_id();
1180+
let node_id_1 = nodes[1].node.get_our_node_id();
1181+
1182+
// Route a payment and attempt to claim it.
1183+
let payment_amount = 1_000_000;
1184+
let (preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1]], payment_amount);
1185+
nodes[1].node.claim_funds(preimage);
1186+
check_added_monitors(&nodes[1], 1);
1187+
1188+
// We'll disable signing counterparty commitments on the payment sender.
1189+
nodes[0].disable_channel_signer_op(&node_id_1, &chan_id, SignerOp::SignCounterpartyCommitment);
1190+
1191+
// After processing the `update_fulfill`, they'll only be able to send `revoke_and_ack` until
1192+
// the `commitment_signed` is no longer pending.
1193+
let update = get_htlc_update_msgs!(&nodes[1], node_id_0);
1194+
nodes[0].node.handle_update_fulfill_htlc(node_id_1, &update.update_fulfill_htlcs[0]);
1195+
nodes[0].node.handle_commitment_signed_batch_test(node_id_1, &update.commitment_signed);
1196+
check_added_monitors(&nodes[0], 1);
1197+
1198+
let revoke_and_ack = get_event_msg!(&nodes[0], MessageSendEvent::SendRevokeAndACK, node_id_1);
1199+
nodes[1].node.handle_revoke_and_ack(node_id_0, &revoke_and_ack);
1200+
check_added_monitors(&nodes[1], 1);
1201+
1202+
// The payment sender shouldn't disconnect the counterparty due to a missing `revoke_and_ack`
1203+
// because the `commitment_signed` isn't ready yet. The payment recipient may disconnect the
1204+
// sender because it doesn't have an async signer and it's expecting a timely
1205+
// `commitment_signed` response.
1206+
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
1207+
nodes[0].node.timer_tick_occurred();
1208+
nodes[1].node.timer_tick_occurred();
1209+
}
1210+
let has_disconnect_event = |event| {
1211+
matches!(
1212+
event, MessageSendEvent::HandleError { action , .. }
1213+
if matches!(action, msgs::ErrorAction::DisconnectPeerWithWarning { .. })
1214+
)
1215+
};
1216+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
1217+
assert!(nodes[1].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event));
1218+
1219+
expect_payment_sent(&nodes[0], preimage, None, false, false);
1220+
expect_payment_claimed!(nodes[1], payment_hash, payment_amount);
1221+
}

lightning/src/ln/channel.rs

+31-13
Original file line numberDiff line numberDiff line change
@@ -3302,23 +3302,33 @@ impl<SP: Deref> ChannelContext<SP> where SP::Target: SignerProvider {
33023302
}
33033303
}
33043304

3305+
/// Checks whether the channel has any HTLC additions, HTLC removals, or fee updates that have
3306+
/// been sent by either side but not yet irrevocably committed on both commitments because we're
3307+
/// waiting on a pending monitor update or signer request.
3308+
pub fn is_monitor_or_signer_pending_channel_update(&self) -> bool {
3309+
self.channel_state.is_monitor_update_in_progress()
3310+
|| self.signer_pending_revoke_and_ack
3311+
|| self.signer_pending_commitment_update
3312+
}
3313+
33053314
/// Checks whether the channel has any HTLC additions, HTLC removals, or fee updates that have
33063315
/// been sent by either side but not yet irrevocably committed on both commitments. Holding cell
33073316
/// updates are not considered because they haven't been sent to the peer yet.
33083317
///
33093318
/// This can be used to satisfy quiescence's requirement when sending `stfu`:
33103319
/// - MUST NOT send `stfu` if any of the sender's htlc additions, htlc removals
33113320
/// or fee updates are pending for either peer.
3312-
fn has_pending_channel_update(&self) -> bool {
3321+
///
3322+
/// Note that it is still possible for an update to be pending that's not captured here due to a
3323+
/// pending monitor update or signer request. `is_monitor_or_signer_pending_channel_update`
3324+
/// should also be checked in such cases.
3325+
fn is_waiting_on_peer_pending_channel_update(&self) -> bool {
33133326
// An update from the local/remote node may be pending on the remote/local commitment since
33143327
// they are not tracked within our state, so we rely on whether any `commitment_signed` or
33153328
// `revoke_and_ack` messages are owed.
33163329
//
33173330
// We check these flags first as they are more likely to be set.
3318-
if self.channel_state.is_awaiting_remote_revoke() || self.expecting_peer_commitment_signed
3319-
|| self.monitor_pending_revoke_and_ack || self.signer_pending_revoke_and_ack
3320-
|| self.monitor_pending_commitment_signed || self.signer_pending_commitment_update
3321-
{
3331+
if self.channel_state.is_awaiting_remote_revoke() || self.expecting_peer_commitment_signed {
33223332
return true;
33233333
}
33243334

@@ -7513,8 +7523,11 @@ impl<SP: Deref> FundedChannel<SP> where
75137523
|| self.context.channel_state.is_local_stfu_sent()
75147524
// Cleared upon receiving a message that triggers the end of quiescence.
75157525
|| self.context.channel_state.is_quiescent()
7516-
// Cleared upon receiving `revoke_and_ack`.
7517-
|| self.context.has_pending_channel_update()
7526+
// Cleared upon receiving `revoke_and_ack`. Since we're not queiscent, as we just
7527+
// checked above, we intentionally don't disconnect our counterparty if we're waiting on
7528+
// a monitor update or signer request.
7529+
|| (self.context.is_waiting_on_peer_pending_channel_update()
7530+
&& !self.context.is_monitor_or_signer_pending_channel_update())
75187531
{
75197532
// This is the first tick we've seen after expecting to make forward progress.
75207533
self.context.sent_message_awaiting_response = Some(1);
@@ -9397,7 +9410,9 @@ impl<SP: Deref> FundedChannel<SP> where
93979410
);
93989411
debug_assert!(self.context.is_live());
93999412

9400-
if self.context.has_pending_channel_update() {
9413+
if self.context.is_waiting_on_peer_pending_channel_update()
9414+
|| self.context.is_monitor_or_signer_pending_channel_update()
9415+
{
94019416
return Err(ChannelError::Ignore(
94029417
"We cannot send `stfu` while state machine is pending".to_owned()
94039418
));
@@ -9458,10 +9473,11 @@ impl<SP: Deref> FundedChannel<SP> where
94589473
));
94599474
}
94609475

9461-
// We don't check `has_pending_channel_update` prior to setting the flag because it
9462-
// considers pending updates from either node. This means we may accept a counterparty
9463-
// `stfu` while they had pending updates, but that's fine as we won't send ours until
9464-
// _all_ pending updates complete, allowing the channel to become quiescent then.
9476+
// We don't check `is_waiting_on_peer_pending_channel_update` prior to setting the flag
9477+
// because it considers pending updates from either node. This means we may accept a
9478+
// counterparty `stfu` while they had pending updates, but that's fine as we won't send
9479+
// ours until _all_ pending updates complete, allowing the channel to become quiescent
9480+
// then.
94659481
self.context.channel_state.set_remote_stfu_sent();
94669482

94679483
let is_holder_initiator = if self.context.channel_state.is_awaiting_quiescence() {
@@ -9485,7 +9501,9 @@ impl<SP: Deref> FundedChannel<SP> where
94859501
// We were expecting to receive `stfu` because we already sent ours.
94869502
self.mark_response_received();
94879503

9488-
if self.context.has_pending_channel_update() {
9504+
if self.context.is_waiting_on_peer_pending_channel_update()
9505+
|| self.context.is_monitor_or_signer_pending_channel_update()
9506+
{
94899507
// Since we've already sent `stfu`, it should not be possible for one of our updates to
94909508
// be pending, so anything pending currently must be from a counterparty update.
94919509
return Err(ChannelError::WarnAndDisconnect(

lightning/src/ln/quiescence_tests.rs

+15-28
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,10 @@ fn allow_shutdown_while_awaiting_quiescence(local_shutdown: bool) {
180180
}
181181

182182
#[test]
183-
fn test_quiescence_tracks_monitor_update_in_progress_and_waits_for_async_signer() {
183+
fn test_quiescence_waits_for_async_signer_and_monitor_update() {
184184
// Test that quiescence:
185185
// a) considers an async signer when determining whether a pending channel update exists
186-
// b) tracks in-progress monitor updates until no longer quiescent
186+
// b) waits until pending monitor updates complete to send `stfu`/become quiescent
187187
let chanmon_cfgs = create_chanmon_cfgs(2);
188188
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
189189
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
@@ -244,29 +244,12 @@ fn test_quiescence_tracks_monitor_update_in_progress_and_waits_for_async_signer(
244244
let revoke_and_ack = find_msg!(msg_events, SendRevokeAndACK);
245245
let stfu = find_msg!(msg_events, SendStfu);
246246

247-
// While handling the last `revoke_and_ack` on nodes[0], we'll hold the monitor update and
248-
// become quiescent.
247+
// While handling the last `revoke_and_ack` on nodes[0], we'll hold the monitor update. We
248+
// cannot become quiescent until it completes.
249249
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
250250
nodes[0].node.handle_revoke_and_ack(node_id_1, &revoke_and_ack);
251251

252252
nodes[0].node.handle_stfu(node_id_1, &stfu);
253-
let stfu = get_event_msg!(&nodes[0], MessageSendEvent::SendStfu, node_id_1);
254-
nodes[1].node.handle_stfu(node_id_0, &stfu);
255-
256-
nodes[0].node.exit_quiescence(&node_id_1, &chan_id).unwrap();
257-
nodes[1].node.exit_quiescence(&node_id_0, &chan_id).unwrap();
258-
259-
// After exiting quiescence, we should be able to resume payments from nodes[0], but the monitor
260-
// update has yet to complete. Attempting to send a payment now will be delayed until the
261-
// monitor update completes.
262-
{
263-
let (route, payment_hash, _, payment_secret) =
264-
get_route_and_payment_hash!(&nodes[0], &nodes[1], payment_amount);
265-
let onion = RecipientOnionFields::secret_only(payment_secret);
266-
let payment_id = PaymentId(payment_hash.0);
267-
nodes[0].node.send_payment_with_route(route, payment_hash, onion, payment_id).unwrap();
268-
}
269-
check_added_monitors(&nodes[0], 0);
270253
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
271254

272255
// We have two updates pending:
@@ -276,17 +259,21 @@ fn test_quiescence_tracks_monitor_update_in_progress_and_waits_for_async_signer(
276259
chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id).unwrap().clone();
277260
let chain_monitor = &nodes[0].chain_monitor.chain_monitor;
278261
// One for the latest commitment transaction update from the last `revoke_and_ack`
279-
chain_monitor.channel_monitor_updated(chan_id, latest_update - 1).unwrap();
262+
chain_monitor.channel_monitor_updated(chan_id, latest_update).unwrap();
280263
expect_payment_sent(&nodes[0], preimage, None, true, true);
281264
// One for the commitment secret update from the last `revoke_and_ack`
282-
chain_monitor.channel_monitor_updated(chan_id, latest_update).unwrap();
265+
chain_monitor.channel_monitor_updated(chan_id, latest_update + 1).unwrap();
283266
}
284267

285-
// With the pending monitor updates complete, we'll see a new monitor update go out when freeing
286-
// the holding cells to send out the new HTLC.
287-
nodes[0].chain_monitor.complete_sole_pending_chan_update(&chan_id);
288-
let _ = get_htlc_update_msgs!(&nodes[0], node_id_1);
289-
check_added_monitors(&nodes[0], 1);
268+
// With the updates completed, we can now become quiescent.
269+
let stfu = get_event_msg!(&nodes[0], MessageSendEvent::SendStfu, node_id_1);
270+
nodes[1].node.handle_stfu(node_id_0, &stfu);
271+
272+
nodes[0].node.exit_quiescence(&node_id_1, &chan_id).unwrap();
273+
nodes[1].node.exit_quiescence(&node_id_0, &chan_id).unwrap();
274+
275+
// After exiting quiescence, we should be able to resume payments from nodes[0].
276+
send_payment(&nodes[0], &[&nodes[1]], payment_amount);
290277
}
291278

292279
#[test]

0 commit comments

Comments
 (0)