Skip to content

Avoid disconnect on message timeout while waiting on monitor/signer #3721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions lightning/src/ln/async_signer_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::chain::ChannelMonitorUpdateStatus;
use crate::events::bump_transaction::WalletSource;
use crate::events::{ClosureReason, Event};
use crate::ln::chan_utils::ClosingTransaction;
use crate::ln::channel::DISCONNECT_PEER_AWAITING_RESPONSE_TICKS;
use crate::ln::channel_state::{ChannelDetails, ChannelShutdownState};
use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields};
use crate::ln::msgs::{BaseMessageHandler, ChannelMessageHandler, MessageSendEvent};
Expand Down Expand Up @@ -1091,3 +1092,130 @@ fn do_test_closing_signed(extra_closing_signed: bool, reconnect: bool) {
check_closed_event!(nodes[0], 1, ClosureReason::LocallyInitiatedCooperativeClosure, [nodes[1].node.get_our_node_id()], 100000);
check_closed_event!(nodes[1], 1, ClosureReason::CounterpartyInitiatedCooperativeClosure, [nodes[0].node.get_our_node_id()], 100000);
}

#[test]
fn test_no_disconnect_while_async_revoke_and_ack_expecting_remote_commitment_signed() {
// Nodes with async signers may be expecting to receive a `commitment_signed` from the
// counterparty even if a `revoke_and_ack` has yet to be sent due to an async signer. Test that
// we don't disconnect the async signer node due to not receiving the `commitment_signed` within
// the timeout while the `revoke_and_ack` is not ready.
let chanmon_cfgs = create_chanmon_cfgs(2);
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;

let node_id_0 = nodes[0].node.get_our_node_id();
let node_id_1 = nodes[1].node.get_our_node_id();

let payment_amount = 1_000_000;
send_payment(&nodes[0], &[&nodes[1]], payment_amount * 4);

nodes[1].disable_channel_signer_op(&node_id_0, &chan_id, SignerOp::ReleaseCommitmentSecret);

// We'll send a payment from both nodes to each other.
let (route1, payment_hash1, _, payment_secret1) =
get_route_and_payment_hash!(&nodes[0], &nodes[1], payment_amount);
let onion1 = RecipientOnionFields::secret_only(payment_secret1);
let payment_id1 = PaymentId(payment_hash1.0);
nodes[0].node.send_payment_with_route(route1, payment_hash1, onion1, payment_id1).unwrap();
check_added_monitors(&nodes[0], 1);

let (route2, payment_hash2, _, payment_secret2) =
get_route_and_payment_hash!(&nodes[1], &nodes[0], payment_amount);
let onion2 = RecipientOnionFields::secret_only(payment_secret2);
let payment_id2 = PaymentId(payment_hash2.0);
nodes[1].node.send_payment_with_route(route2, payment_hash2, onion2, payment_id2).unwrap();
check_added_monitors(&nodes[1], 1);

let update = get_htlc_update_msgs!(&nodes[0], node_id_1);
nodes[1].node.handle_update_add_htlc(node_id_0, &update.update_add_htlcs[0]);
nodes[1].node.handle_commitment_signed_batch_test(node_id_0, &update.commitment_signed);
check_added_monitors(&nodes[1], 1);

let update = get_htlc_update_msgs!(&nodes[1], node_id_0);
nodes[0].node.handle_update_add_htlc(node_id_1, &update.update_add_htlcs[0]);
nodes[0].node.handle_commitment_signed_batch_test(node_id_1, &update.commitment_signed);
check_added_monitors(&nodes[0], 1);

// nodes[0] can only respond with a `revoke_and_ack`. The `commitment_signed` that would follow
// is blocked on receiving a counterparty `revoke_and_ack`, which nodes[1] is still pending on.
let revoke_and_ack = get_event_msg!(&nodes[0], MessageSendEvent::SendRevokeAndACK, node_id_1);
nodes[1].node.handle_revoke_and_ack(node_id_0, &revoke_and_ack);
check_added_monitors(&nodes[1], 1);

assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());

// nodes[0] will disconnect the counterparty as it's waiting on a `revoke_and_ack`.
// nodes[1] is waiting on a `commitment_signed`, but since it hasn't yet sent its own
// `revoke_and_ack`, it shouldn't disconnect yet.
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
nodes[0].node.timer_tick_occurred();
nodes[1].node.timer_tick_occurred();
}
let has_disconnect_event = |event| {
matches!(
event, MessageSendEvent::HandleError { action , .. }
if matches!(action, msgs::ErrorAction::DisconnectPeerWithWarning { .. })
)
};
assert!(nodes[0].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event));
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
}

#[test]
fn test_no_disconnect_while_async_commitment_signed_expecting_remote_revoke_and_ack() {
// Nodes with async signers may be expecting to receive a `revoke_and_ack` from the
// counterparty even if a `commitment_signed` has yet to be sent due to an async signer. Test
// that we don't disconnect the async signer node due to not receiving the `revoke_and_ack`
// within the timeout while the `commitment_signed` is not ready.
let chanmon_cfgs = create_chanmon_cfgs(2);
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;

let node_id_0 = nodes[0].node.get_our_node_id();
let node_id_1 = nodes[1].node.get_our_node_id();

// Route a payment and attempt to claim it.
let payment_amount = 1_000_000;
let (preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1]], payment_amount);
nodes[1].node.claim_funds(preimage);
check_added_monitors(&nodes[1], 1);

// We'll disable signing counterparty commitments on the payment sender.
nodes[0].disable_channel_signer_op(&node_id_1, &chan_id, SignerOp::SignCounterpartyCommitment);

// After processing the `update_fulfill`, they'll only be able to send `revoke_and_ack` until
// the `commitment_signed` is no longer pending.
let update = get_htlc_update_msgs!(&nodes[1], node_id_0);
nodes[0].node.handle_update_fulfill_htlc(node_id_1, &update.update_fulfill_htlcs[0]);
nodes[0].node.handle_commitment_signed_batch_test(node_id_1, &update.commitment_signed);
check_added_monitors(&nodes[0], 1);

let revoke_and_ack = get_event_msg!(&nodes[0], MessageSendEvent::SendRevokeAndACK, node_id_1);
nodes[1].node.handle_revoke_and_ack(node_id_0, &revoke_and_ack);
check_added_monitors(&nodes[1], 1);

// The payment sender shouldn't disconnect the counterparty due to a missing `revoke_and_ack`
// because the `commitment_signed` isn't ready yet. The payment recipient may disconnect the
// sender because it doesn't have an async signer and it's expecting a timely
// `commitment_signed` response.
for _ in 0..DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
nodes[0].node.timer_tick_occurred();
nodes[1].node.timer_tick_occurred();
}
let has_disconnect_event = |event| {
matches!(
event, MessageSendEvent::HandleError { action , .. }
if matches!(action, msgs::ErrorAction::DisconnectPeerWithWarning { .. })
)
};
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
assert!(nodes[1].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event));

expect_payment_sent(&nodes[0], preimage, None, false, false);
expect_payment_claimed!(nodes[1], payment_hash, payment_amount);
}
44 changes: 31 additions & 13 deletions lightning/src/ln/channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3205,23 +3205,33 @@ impl<SP: Deref> ChannelContext<SP> where SP::Target: SignerProvider {
}
}

/// Checks whether the channel has any HTLC additions, HTLC removals, or fee updates that have
/// been sent by either side but not yet irrevocably committed on both commitments because we're
/// waiting on a pending monitor update or signer request.
pub fn is_monitor_or_signer_pending_channel_update(&self) -> bool {
self.channel_state.is_monitor_update_in_progress()
|| self.signer_pending_revoke_and_ack
|| self.signer_pending_commitment_update
}

/// Checks whether the channel has any HTLC additions, HTLC removals, or fee updates that have
/// been sent by either side but not yet irrevocably committed on both commitments. Holding cell
/// updates are not considered because they haven't been sent to the peer yet.
///
/// This can be used to satisfy quiescence's requirement when sending `stfu`:
/// - MUST NOT send `stfu` if any of the sender's htlc additions, htlc removals
/// or fee updates are pending for either peer.
fn has_pending_channel_update(&self) -> bool {
///
/// Note that it is still possible for an update to be pending that's not captured here due to a
/// pending monitor update or signer request. `is_monitor_or_signer_pending_channel_update`
/// should also be checked in such cases.
fn is_waiting_on_peer_pending_channel_update(&self) -> bool {
// An update from the local/remote node may be pending on the remote/local commitment since
// they are not tracked within our state, so we rely on whether any `commitment_signed` or
// `revoke_and_ack` messages are owed.
//
// We check these flags first as they are more likely to be set.
if self.channel_state.is_awaiting_remote_revoke() || self.expecting_peer_commitment_signed
|| self.monitor_pending_revoke_and_ack || self.signer_pending_revoke_and_ack
|| self.monitor_pending_commitment_signed || self.signer_pending_commitment_update
{
if self.channel_state.is_awaiting_remote_revoke() || self.expecting_peer_commitment_signed {
return true;
}

Expand Down Expand Up @@ -7417,8 +7427,11 @@ impl<SP: Deref> FundedChannel<SP> where
|| self.context.channel_state.is_local_stfu_sent()
// Cleared upon receiving a message that triggers the end of quiescence.
|| self.context.channel_state.is_quiescent()
// Cleared upon receiving `revoke_and_ack`.
|| self.context.has_pending_channel_update()
// Cleared upon receiving `revoke_and_ack`. Since we're not queiscent, as we just
// checked above, we intentionally don't disconnect our counterparty if we're waiting on
// a monitor update or signer request.
|| (self.context.is_waiting_on_peer_pending_channel_update()
&& !self.context.is_monitor_or_signer_pending_channel_update())
{
// This is the first tick we've seen after expecting to make forward progress.
self.context.sent_message_awaiting_response = Some(1);
Expand Down Expand Up @@ -9306,7 +9319,9 @@ impl<SP: Deref> FundedChannel<SP> where
);
debug_assert!(self.context.is_live());

if self.context.has_pending_channel_update() {
if self.context.is_waiting_on_peer_pending_channel_update()
|| self.context.is_monitor_or_signer_pending_channel_update()
{
return Err(ChannelError::Ignore(
"We cannot send `stfu` while state machine is pending".to_owned()
));
Expand Down Expand Up @@ -9367,10 +9382,11 @@ impl<SP: Deref> FundedChannel<SP> where
));
}

// We don't check `has_pending_channel_update` prior to setting the flag because it
// considers pending updates from either node. This means we may accept a counterparty
// `stfu` while they had pending updates, but that's fine as we won't send ours until
// _all_ pending updates complete, allowing the channel to become quiescent then.
// We don't check `is_waiting_on_peer_pending_channel_update` prior to setting the flag
// because it considers pending updates from either node. This means we may accept a
// counterparty `stfu` while they had pending updates, but that's fine as we won't send
// ours until _all_ pending updates complete, allowing the channel to become quiescent
// then.
self.context.channel_state.set_remote_stfu_sent();

let is_holder_initiator = if self.context.channel_state.is_awaiting_quiescence() {
Expand All @@ -9394,7 +9410,9 @@ impl<SP: Deref> FundedChannel<SP> where
// We were expecting to receive `stfu` because we already sent ours.
self.mark_response_received();

if self.context.has_pending_channel_update() {
if self.context.is_waiting_on_peer_pending_channel_update()
|| self.context.is_monitor_or_signer_pending_channel_update()
{
// Since we've already sent `stfu`, it should not be possible for one of our updates to
// be pending, so anything pending currently must be from a counterparty update.
return Err(ChannelError::WarnAndDisconnect(
Expand Down
43 changes: 15 additions & 28 deletions lightning/src/ln/quiescence_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,10 @@ fn allow_shutdown_while_awaiting_quiescence(local_shutdown: bool) {
}

#[test]
fn test_quiescence_tracks_monitor_update_in_progress_and_waits_for_async_signer() {
fn test_quiescence_waits_for_async_signer_and_monitor_update() {
// Test that quiescence:
// a) considers an async signer when determining whether a pending channel update exists
// b) tracks in-progress monitor updates until no longer quiescent
// b) waits until pending monitor updates complete to send `stfu`/become quiescent
let chanmon_cfgs = create_chanmon_cfgs(2);
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
Expand Down Expand Up @@ -244,29 +244,12 @@ fn test_quiescence_tracks_monitor_update_in_progress_and_waits_for_async_signer(
let revoke_and_ack = find_msg!(msg_events, SendRevokeAndACK);
let stfu = find_msg!(msg_events, SendStfu);

// While handling the last `revoke_and_ack` on nodes[0], we'll hold the monitor update and
// become quiescent.
// While handling the last `revoke_and_ack` on nodes[0], we'll hold the monitor update. We
// cannot become quiescent until it completes.
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
nodes[0].node.handle_revoke_and_ack(node_id_1, &revoke_and_ack);

nodes[0].node.handle_stfu(node_id_1, &stfu);
let stfu = get_event_msg!(&nodes[0], MessageSendEvent::SendStfu, node_id_1);
nodes[1].node.handle_stfu(node_id_0, &stfu);

nodes[0].node.exit_quiescence(&node_id_1, &chan_id).unwrap();
nodes[1].node.exit_quiescence(&node_id_0, &chan_id).unwrap();

// After exiting quiescence, we should be able to resume payments from nodes[0], but the monitor
// update has yet to complete. Attempting to send a payment now will be delayed until the
// monitor update completes.
{
let (route, payment_hash, _, payment_secret) =
get_route_and_payment_hash!(&nodes[0], &nodes[1], payment_amount);
let onion = RecipientOnionFields::secret_only(payment_secret);
let payment_id = PaymentId(payment_hash.0);
nodes[0].node.send_payment_with_route(route, payment_hash, onion, payment_id).unwrap();
}
check_added_monitors(&nodes[0], 0);
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());

// We have two updates pending:
Expand All @@ -276,17 +259,21 @@ fn test_quiescence_tracks_monitor_update_in_progress_and_waits_for_async_signer(
chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id).unwrap().clone();
let chain_monitor = &nodes[0].chain_monitor.chain_monitor;
// One for the latest commitment transaction update from the last `revoke_and_ack`
chain_monitor.channel_monitor_updated(chan_id, latest_update - 1).unwrap();
chain_monitor.channel_monitor_updated(chan_id, latest_update).unwrap();
expect_payment_sent(&nodes[0], preimage, None, true, true);
// One for the commitment secret update from the last `revoke_and_ack`
chain_monitor.channel_monitor_updated(chan_id, latest_update).unwrap();
chain_monitor.channel_monitor_updated(chan_id, latest_update + 1).unwrap();
}

// With the pending monitor updates complete, we'll see a new monitor update go out when freeing
// the holding cells to send out the new HTLC.
nodes[0].chain_monitor.complete_sole_pending_chan_update(&chan_id);
let _ = get_htlc_update_msgs!(&nodes[0], node_id_1);
check_added_monitors(&nodes[0], 1);
// With the updates completed, we can now become quiescent.
let stfu = get_event_msg!(&nodes[0], MessageSendEvent::SendStfu, node_id_1);
nodes[1].node.handle_stfu(node_id_0, &stfu);

nodes[0].node.exit_quiescence(&node_id_1, &chan_id).unwrap();
nodes[1].node.exit_quiescence(&node_id_0, &chan_id).unwrap();

// After exiting quiescence, we should be able to resume payments from nodes[0].
send_payment(&nodes[0], &[&nodes[1]], payment_amount);
}

#[test]
Expand Down
Loading