@@ -3860,3 +3860,225 @@ fn test_claim_to_closed_channel_blocks_claimed_event() {
3860
3860
nodes[ 1 ] . chain_monitor . complete_sole_pending_chan_update ( & chan_a. 2 ) ;
3861
3861
expect_payment_claimed ! ( nodes[ 1 ] , payment_hash, 1_000_000 ) ;
3862
3862
}
3863
+
3864
+ #[ test]
3865
+ #[ cfg( all( feature = "std" , not( target_os = "windows" ) ) ) ]
3866
+ fn test_single_channel_multiple_mpp ( ) {
3867
+ use std:: sync:: atomic:: { AtomicBool , Ordering } ;
3868
+
3869
+ // Test what happens when we attempt to claim an MPP with many parts that came to us through
3870
+ // the same channel with a synchronous persistence interface which has very high latency.
3871
+ //
3872
+ // Previously, if a `revoke_and_ack` came in while we were still running in
3873
+ // `ChannelManager::claim_payment` we'd end up hanging waiting to apply a
3874
+ // `ChannelMonitorUpdate` until after it completed. See the commit which introduced this test
3875
+ // for more info.
3876
+ let chanmon_cfgs = create_chanmon_cfgs ( 9 ) ;
3877
+ let node_cfgs = create_node_cfgs ( 9 , & chanmon_cfgs) ;
3878
+ let configs = [ None , None , None , None , None , None , None , None , None ] ;
3879
+ let node_chanmgrs = create_node_chanmgrs ( 9 , & node_cfgs, & configs) ;
3880
+ let mut nodes = create_network ( 9 , & node_cfgs, & node_chanmgrs) ;
3881
+
3882
+ let node_7_id = nodes[ 7 ] . node . get_our_node_id ( ) ;
3883
+ let node_8_id = nodes[ 8 ] . node . get_our_node_id ( ) ;
3884
+
3885
+ // Send an MPP payment in six parts along the path shown from top to bottom
3886
+ // 0
3887
+ // 1 2 3 4 5 6
3888
+ // 7
3889
+ // 8
3890
+ //
3891
+ // We can in theory reproduce this issue with fewer channels/HTLCs, but getting this test
3892
+ // robust is rather challenging. We rely on having the main test thread wait on locks held in
3893
+ // the background `claim_funds` thread and unlocking when the `claim_funds` thread completes a
3894
+ // single `ChannelMonitorUpdate`.
3895
+ // This thread calls `get_and_clear_pending_msg_events()` and `handle_revoke_and_ack()`, both
3896
+ // of which require `ChannelManager` locks, but we have to make sure this thread gets a chance
3897
+ // to be blocked on the mutexes before we let the background thread wake `claim_funds` so that
3898
+ // the mutex can switch to this main thread.
3899
+ // This relies on our locks being fair, but also on our threads getting runtime during the test
3900
+ // run, which can be pretty competitive. Thus we do a dumb dance to be as conservative as
3901
+ // possible - we have a background thread which completes a `ChannelMonitorUpdate` (by sending
3902
+ // into the `write_blocker` mpsc) but it doesn't run until a mpsc channel sends from this main
3903
+ // thread to the background thread, and then we let it sleep a while before we send the
3904
+ // `ChannelMonitorUpdate` unblocker.
3905
+ // Further, we give ourselves two chances each time, needing 4 HTLCs just to unlock our two
3906
+ // `ChannelManager` calls. We then need a few remaining HTLCs to actually trigger the bug, so
3907
+ // we use 6 HTLCs.
3908
+ // Finaly, we do not run this test on Winblowz because it, somehow, in 2025, does not implement
3909
+ // actual preemptive multitasking and thinks that cooperative multitasking somehow is
3910
+ // acceptable in the 21st century, let alone a quarter of the way into it.
3911
+ const MAX_THREAD_INIT_TIME : std:: time:: Duration = std:: time:: Duration :: from_secs ( 1 ) ;
3912
+
3913
+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 1 , 100_000 , 0 ) ;
3914
+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 2 , 100_000 , 0 ) ;
3915
+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 3 , 100_000 , 0 ) ;
3916
+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 4 , 100_000 , 0 ) ;
3917
+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 5 , 100_000 , 0 ) ;
3918
+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 6 , 100_000 , 0 ) ;
3919
+
3920
+ create_announced_chan_between_nodes_with_value ( & nodes, 1 , 7 , 100_000 , 0 ) ;
3921
+ create_announced_chan_between_nodes_with_value ( & nodes, 2 , 7 , 100_000 , 0 ) ;
3922
+ create_announced_chan_between_nodes_with_value ( & nodes, 3 , 7 , 100_000 , 0 ) ;
3923
+ create_announced_chan_between_nodes_with_value ( & nodes, 4 , 7 , 100_000 , 0 ) ;
3924
+ create_announced_chan_between_nodes_with_value ( & nodes, 5 , 7 , 100_000 , 0 ) ;
3925
+ create_announced_chan_between_nodes_with_value ( & nodes, 6 , 7 , 100_000 , 0 ) ;
3926
+ create_announced_chan_between_nodes_with_value ( & nodes, 7 , 8 , 1_000_000 , 0 ) ;
3927
+
3928
+ let ( mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash ! ( & nodes[ 0 ] , nodes[ 8 ] , 50_000_000 ) ;
3929
+
3930
+ send_along_route_with_secret ( & nodes[ 0 ] , route, & [ & [ & nodes[ 1 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 2 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 3 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 4 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 5 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 6 ] , & nodes[ 7 ] , & nodes[ 8 ] ] ] , 50_000_000 , payment_hash, payment_secret) ;
3931
+
3932
+ let ( do_a_write, blocker) = std:: sync:: mpsc:: sync_channel ( 0 ) ;
3933
+ * nodes[ 8 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = Some ( blocker) ;
3934
+
3935
+ // Until we have std::thread::scoped we have to unsafe { turn off the borrow checker }.
3936
+ // We do this by casting a pointer to a `TestChannelManager` to a pointer to a
3937
+ // `TestChannelManager` with different (in this case 'static) lifetime.
3938
+ // This is even suggested in the second example at
3939
+ // https://doc.rust-lang.org/std/mem/fn.transmute.html#examples
3940
+ let claim_node: & ' static TestChannelManager < ' static , ' static > =
3941
+ unsafe { std:: mem:: transmute ( nodes[ 8 ] . node as & TestChannelManager ) } ;
3942
+ let thrd = std:: thread:: spawn ( move || {
3943
+ // Initiate the claim in a background thread as it will immediately block waiting on the
3944
+ // `write_blocker` we set above.
3945
+ claim_node. claim_funds ( payment_preimage) ;
3946
+ } ) ;
3947
+
3948
+ // First unlock one monitor so that we have a pending
3949
+ // `update_fulfill_htlc`/`commitment_signed` pair to pass to our counterparty.
3950
+ do_a_write. send ( ( ) ) . unwrap ( ) ;
3951
+
3952
+ // Then fetch the `update_fulfill_htlc`/`commitment_signed`. Note that the
3953
+ // `get_and_clear_pending_msg_events` will immediately hang trying to take a peer lock which
3954
+ // `claim_funds` is holding. Thus, we release a second write after a small sleep in the
3955
+ // background to give `claim_funds` a chance to step forward, unblocking
3956
+ // `get_and_clear_pending_msg_events`.
3957
+ let do_a_write_background = do_a_write. clone ( ) ;
3958
+ let block_thrd2 = AtomicBool :: new ( true ) ;
3959
+ let block_thrd2_read: & ' static AtomicBool = unsafe { std:: mem:: transmute ( & block_thrd2) } ;
3960
+ let thrd2 = std:: thread:: spawn ( move || {
3961
+ while block_thrd2_read. load ( Ordering :: Acquire ) {
3962
+ std:: thread:: yield_now ( ) ;
3963
+ }
3964
+ std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3965
+ do_a_write_background. send ( ( ) ) . unwrap ( ) ;
3966
+ std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3967
+ do_a_write_background. send ( ( ) ) . unwrap ( ) ;
3968
+ } ) ;
3969
+ block_thrd2. store ( false , Ordering :: Release ) ;
3970
+ let first_updates = get_htlc_update_msgs ( & nodes[ 8 ] , & nodes[ 7 ] . node . get_our_node_id ( ) ) ;
3971
+ thrd2. join ( ) . unwrap ( ) ;
3972
+
3973
+ // Disconnect node 6 from all its peers so it doesn't bother to fail the HTLCs back
3974
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 1 ] . node . get_our_node_id ( ) ) ;
3975
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 2 ] . node . get_our_node_id ( ) ) ;
3976
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 3 ] . node . get_our_node_id ( ) ) ;
3977
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 4 ] . node . get_our_node_id ( ) ) ;
3978
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3979
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 6 ] . node . get_our_node_id ( ) ) ;
3980
+
3981
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & first_updates. update_fulfill_htlcs [ 0 ] ) ;
3982
+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
3983
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 1 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
3984
+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & first_updates. commitment_signed ) ;
3985
+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
3986
+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
3987
+
3988
+ // Now, handle the `revoke_and_ack` from node 5. Note that `claim_funds` is still blocked on
3989
+ // our peer lock, so we have to release a write to let it process.
3990
+ // After this call completes, the channel previously would be locked up and should not be able
3991
+ // to make further progress.
3992
+ let do_a_write_background = do_a_write. clone ( ) ;
3993
+ let block_thrd3 = AtomicBool :: new ( true ) ;
3994
+ let block_thrd3_read: & ' static AtomicBool = unsafe { std:: mem:: transmute ( & block_thrd3) } ;
3995
+ let thrd3 = std:: thread:: spawn ( move || {
3996
+ while block_thrd3_read. load ( Ordering :: Acquire ) {
3997
+ std:: thread:: yield_now ( ) ;
3998
+ }
3999
+ std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
4000
+ do_a_write_background. send ( ( ) ) . unwrap ( ) ;
4001
+ std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
4002
+ do_a_write_background. send ( ( ) ) . unwrap ( ) ;
4003
+ } ) ;
4004
+ block_thrd3. store ( false , Ordering :: Release ) ;
4005
+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
4006
+ thrd3. join ( ) . unwrap ( ) ;
4007
+ assert ! ( !thrd. is_finished( ) ) ;
4008
+
4009
+ let thrd4 = std:: thread:: spawn ( move || {
4010
+ do_a_write. send ( ( ) ) . unwrap ( ) ;
4011
+ do_a_write. send ( ( ) ) . unwrap ( ) ;
4012
+ } ) ;
4013
+
4014
+ thrd4. join ( ) . unwrap ( ) ;
4015
+ thrd. join ( ) . unwrap ( ) ;
4016
+
4017
+ expect_payment_claimed ! ( nodes[ 8 ] , payment_hash, 50_000_000 ) ;
4018
+
4019
+ // At the end, we should have 7 ChannelMonitorUpdates - 6 for HTLC claims, and one for the
4020
+ // above `revoke_and_ack`.
4021
+ check_added_monitors ( & nodes[ 8 ] , 7 ) ;
4022
+
4023
+ // Now drive everything to the end, at least as far as node 7 is concerned...
4024
+ * nodes[ 8 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = None ;
4025
+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4026
+ check_added_monitors ( & nodes[ 8 ] , 1 ) ;
4027
+
4028
+ let ( updates, raa) = get_updates_and_revoke ( & nodes[ 8 ] , & nodes[ 7 ] . node . get_our_node_id ( ) ) ;
4029
+
4030
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
4031
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 2 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4032
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 1 ] ) ;
4033
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 3 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4034
+ let mut next_source = 4 ;
4035
+ if let Some ( update) = updates. update_fulfill_htlcs . get ( 2 ) {
4036
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, update) ;
4037
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 4 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4038
+ next_source += 1 ;
4039
+ }
4040
+
4041
+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & updates. commitment_signed ) ;
4042
+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4043
+ if updates. update_fulfill_htlcs . get ( 2 ) . is_some ( ) {
4044
+ check_added_monitors ( & nodes[ 7 ] , 5 ) ;
4045
+ } else {
4046
+ check_added_monitors ( & nodes[ 7 ] , 4 ) ;
4047
+ }
4048
+
4049
+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
4050
+
4051
+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
4052
+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4053
+ check_added_monitors ( & nodes[ 8 ] , 2 ) ;
4054
+
4055
+ let ( updates, raa) = get_updates_and_revoke ( & nodes[ 8 ] , & node_7_id) ;
4056
+
4057
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
4058
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4059
+ next_source += 1 ;
4060
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 1 ] ) ;
4061
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4062
+ next_source += 1 ;
4063
+ if let Some ( update) = updates. update_fulfill_htlcs . get ( 2 ) {
4064
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, update) ;
4065
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4066
+ }
4067
+
4068
+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & updates. commitment_signed ) ;
4069
+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4070
+ if updates. update_fulfill_htlcs . get ( 2 ) . is_some ( ) {
4071
+ check_added_monitors ( & nodes[ 7 ] , 5 ) ;
4072
+ } else {
4073
+ check_added_monitors ( & nodes[ 7 ] , 4 ) ;
4074
+ }
4075
+
4076
+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
4077
+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
4078
+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4079
+ check_added_monitors ( & nodes[ 8 ] , 2 ) ;
4080
+
4081
+ let raa = get_event_msg ! ( nodes[ 8 ] , MessageSendEvent :: SendRevokeAndACK , node_7_id) ;
4082
+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4083
+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
4084
+ }
0 commit comments