From 0e63c40c74c1fb891489f07c42c246692d692357 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 17 Mar 2026 12:12:32 -0400 Subject: [PATCH 01/28] Split up some tests that have many variants Helps when debugging to know which variants failed. --- lightning/src/ln/monitor_tests.rs | 60 +++++++++++++++++++++++++++++-- lightning/src/ln/payment_tests.rs | 26 +++++++++++++- 2 files changed, 83 insertions(+), 3 deletions(-) diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index efd2084a38e..1a68e5165a8 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3801,27 +3801,83 @@ fn do_test_lost_timeout_monitor_events(confirm_tx: CommitmentType, dust_htlcs: b } #[test] -fn test_lost_timeout_monitor_events() { +fn test_lost_timeout_monitor_events_a() { do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, false, false); +} +#[test] +fn test_lost_timeout_monitor_events_b() { do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, true, false); +} +#[test] +fn test_lost_timeout_monitor_events_c() { do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, false, false); +} +#[test] +fn test_lost_timeout_monitor_events_d() { do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, true, false); +} +#[test] +fn test_lost_timeout_monitor_events_e() { do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, false, false); +} +#[test] +fn test_lost_timeout_monitor_events_f() { do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, true, false); +} +#[test] +fn test_lost_timeout_monitor_events_g() { do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, false, false); +} +#[test] +fn test_lost_timeout_monitor_events_h() { do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, true, false); +} +#[test] +fn test_lost_timeout_monitor_events_i() { do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, false, false); +} +#[test] +fn test_lost_timeout_monitor_events_j() { do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, true, false); - +} +#[test] +fn test_lost_timeout_monitor_events_k() { do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, false, true); +} +#[test] +fn test_lost_timeout_monitor_events_l() { do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, true, true); +} +#[test] +fn test_lost_timeout_monitor_events_m() { do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, false, true); +} +#[test] +fn test_lost_timeout_monitor_events_n() { do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, true, true); +} +#[test] +fn test_lost_timeout_monitor_events_o() { do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, false, true); +} +#[test] +fn test_lost_timeout_monitor_events_p() { do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, true, true); +} +#[test] +fn test_lost_timeout_monitor_events_q() { do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, false, true); +} +#[test] +fn test_lost_timeout_monitor_events_r() { do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, true, true); +} +#[test] +fn test_lost_timeout_monitor_events_s() { do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, false, true); +} +#[test] +fn test_lost_timeout_monitor_events_t() { do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, true, true); } diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs index be52459a872..6d8b727f81a 100644 --- a/lightning/src/ln/payment_tests.rs +++ b/lightning/src/ln/payment_tests.rs @@ -1423,15 +1423,39 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload( } #[test] -fn test_dup_htlc_onchain_doesnt_fail_on_reload() { +fn test_dup_htlc_onchain_doesnt_fail_on_reload_a() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, true, true); +} +#[test] +fn test_dup_htlc_onchain_doesnt_fail_on_reload_b() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, true, false); +} +#[test] +fn test_dup_htlc_onchain_doesnt_fail_on_reload_c() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, false, false); +} +#[test] +fn test_dup_htlc_onchain_doesnt_fail_on_reload_d() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, true, true); +} +#[test] +fn test_dup_htlc_onchain_doesnt_fail_on_reload_e() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, true, false); +} +#[test] +fn test_dup_htlc_onchain_doesnt_fail_on_reload_f() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, false, false); +} +#[test] +fn test_dup_htlc_onchain_doesnt_fail_on_reload_g() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, true, true); +} +#[test] +fn test_dup_htlc_onchain_doesnt_fail_on_reload_h() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, true, false); +} +#[test] +fn test_dup_htlc_onchain_doesnt_fail_on_reload_i() { do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, false, false); } From 5719e3dac21164dd29b5d868102af61012caefef Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Thu, 19 Mar 2026 16:08:46 -0400 Subject: [PATCH 02/28] Remove unnecessary pending_monitor_events clone --- lightning/src/chain/channelmonitor.rs | 31 +++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 0173e988082..fe09c700ba1 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -68,8 +68,8 @@ use crate::util::byte_utils; use crate::util::logger::{Logger, WithContext}; use crate::util::persist::MonitorName; use crate::util::ser::{ - MaybeReadable, Readable, ReadableArgs, RequiredWrapper, UpgradableRequired, Writeable, Writer, - U48, + Iterable, MaybeReadable, Readable, ReadableArgs, RequiredWrapper, UpgradableRequired, + Writeable, Writer, U48, }; #[allow(unused_imports)] @@ -1719,24 +1719,23 @@ pub(crate) fn write_chanmon_internal( channel_monitor.lockdown_from_offchain.write(writer)?; channel_monitor.holder_tx_signed.write(writer)?; - // If we have a `HolderForceClosedWithInfo` event, we need to write the `HolderForceClosed` for backwards compatibility. - let pending_monitor_events = - match channel_monitor.pending_monitor_events.iter().find(|ev| match ev { - MonitorEvent::HolderForceClosedWithInfo { .. } => true, - _ => false, - }) { - Some(MonitorEvent::HolderForceClosedWithInfo { outpoint, .. }) => { - let mut pending_monitor_events = channel_monitor.pending_monitor_events.clone(); - pending_monitor_events.push(MonitorEvent::HolderForceClosed(*outpoint)); - pending_monitor_events - }, - _ => channel_monitor.pending_monitor_events.clone(), - }; + // If we have a `HolderForceClosedWithInfo` event, we need to write the `HolderForceClosed` + // for backwards compatibility. + let holder_force_closed_compat = channel_monitor.pending_monitor_events.iter().find_map(|ev| { + if let MonitorEvent::HolderForceClosedWithInfo { outpoint, .. } = ev { + Some(MonitorEvent::HolderForceClosed(*outpoint)) + } else { + None + } + }); + let pending_monitor_events = Iterable( + channel_monitor.pending_monitor_events.iter().chain(holder_force_closed_compat.as_ref()), + ); write_tlv_fields!(writer, { (1, channel_monitor.funding_spend_confirmed, option), (3, channel_monitor.htlcs_resolved_on_chain, required_vec), - (5, pending_monitor_events, required_vec), + (5, pending_monitor_events, required), // Equivalent to required_vec because Iterable also writes as WithoutLength (7, channel_monitor.funding_spend_seen, required), (9, channel_monitor.counterparty_node_id, required), (11, channel_monitor.confirmed_commitment_tx_counterparty_output, option), From 68edf63597fa5b2fcc9e179c189c57620474ae86 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 17 Mar 2026 15:26:43 -0400 Subject: [PATCH 03/28] Add persistent_monitor_events flag to monitors/manager Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. As a first step towards this, here we persist a flag in the ChannelManager and ChannelMonitors indicating whether this new feature is enabled. It will be used in upcoming commits to maintain compatibility and create an upgrade/downgrade path between LDK versions. --- lightning/src/chain/channelmonitor.rs | 20 +++++++++++++++++++ lightning/src/ln/channelmanager.rs | 28 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index fe09c700ba1..fdc07c9576f 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -1281,6 +1281,12 @@ pub(crate) struct ChannelMonitorImpl { // block/transaction-connected events and *not* during block/transaction-disconnected events, // we further MUST NOT generate events during block/transaction-disconnection. pending_monitor_events: Vec, + /// When set, monitor events are retained until explicitly acked rather than cleared on read. + /// + /// Allows the ChannelManager to reconstruct pending HTLC state by replaying monitor events on + /// startup, and make the monitor responsible for both off- and on-chain payment resolution. Will + /// be always set once support for this feature is complete. + persistent_events_enabled: bool, pub(super) pending_events: Vec, pub(super) is_processing_pending_events: bool, @@ -1732,8 +1738,12 @@ pub(crate) fn write_chanmon_internal( channel_monitor.pending_monitor_events.iter().chain(holder_force_closed_compat.as_ref()), ); + // Only write `persistent_events_enabled` if it's set to true, as it's an even TLV. + let persistent_events_enabled = channel_monitor.persistent_events_enabled.then_some(()); + write_tlv_fields!(writer, { (1, channel_monitor.funding_spend_confirmed, option), + (2, persistent_events_enabled, option), (3, channel_monitor.htlcs_resolved_on_chain, required_vec), (5, pending_monitor_events, required), // Equivalent to required_vec because Iterable also writes as WithoutLength (7, channel_monitor.funding_spend_seen, required), @@ -1938,6 +1948,7 @@ impl ChannelMonitor { payment_preimages: new_hash_map(), pending_monitor_events: Vec::new(), + persistent_events_enabled: false, pending_events: Vec::new(), is_processing_pending_events: false, @@ -6692,8 +6703,10 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP let mut is_manual_broadcast = RequiredWrapper(None); let mut funding_seen_onchain = RequiredWrapper(None); let mut best_block_previous_blocks = None; + let mut persistent_events_enabled: Option<()> = None; read_tlv_fields!(reader, { (1, funding_spend_confirmed, option), + (2, persistent_events_enabled, option), (3, htlcs_resolved_on_chain, optional_vec), (5, pending_monitor_events, optional_vec), (7, funding_spend_seen, option), @@ -6720,6 +6733,12 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP best_block.previous_blocks = previous_blocks; } + #[cfg(not(any(feature = "_test_utils", test)))] + if persistent_events_enabled.is_some() { + // This feature isn't supported yet so error if the writer expected it to be. + return Err(DecodeError::InvalidValue) + } + // Note that `payment_preimages_with_info` was added (and is always written) in LDK 0.1, so // we can use it to determine if this monitor was last written by LDK 0.1 or later. let written_by_0_1_or_later = payment_preimages_with_info.is_some(); @@ -6868,6 +6887,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP payment_preimages, pending_monitor_events: pending_monitor_events.unwrap(), + persistent_events_enabled: persistent_events_enabled.is_some(), pending_events, is_processing_pending_events: false, diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 660f61f0f57..06296af443c 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -2950,6 +2950,15 @@ pub struct ChannelManager< /// offer they resolve to to the given one. pub testing_dnssec_proof_offer_resolution_override: Mutex>, + /// When set, monitors will repeatedly provide an event back to the `ChannelManager` on restart + /// until the event is explicitly acknowledged as processed. + /// + /// Allows us to reconstruct pending HTLC state by replaying monitor events on startup, rather + /// than from complexly polling and reconciling Channel{Monitor} APIs, as well as move the + /// responsibility of off-chain payment resolution from the Channel to the monitor. Will be + /// always set once support is complete. + persistent_monitor_events: bool, + #[cfg(test)] pub(super) entropy_source: ES, #[cfg(not(test))] @@ -3681,6 +3690,8 @@ impl< logger, + persistent_monitor_events: false, + #[cfg(feature = "_test_utils")] testing_dnssec_proof_offer_resolution_override: Mutex::new(new_hash_map()), } @@ -18235,6 +18246,9 @@ impl< } } + // Only write `persistent_events_enabled` if it's set to true, as it's an even TLV. + let persistent_monitor_events = self.persistent_monitor_events.then_some(()); + write_tlv_fields!(writer, { (1, pending_outbound_payments_no_retry, required), (2, pending_intercepted_htlcs, option), @@ -18247,6 +18261,7 @@ impl< (9, htlc_purposes, required_vec), (10, legacy_in_flight_monitor_updates, option), (11, self.probing_cookie_secret, required), + (12, persistent_monitor_events, option), (13, htlc_onion_fields, optional_vec), (14, decode_update_add_htlcs_opt, option), (15, self.inbound_payment_id_secret, required), @@ -18346,6 +18361,7 @@ pub(super) struct ChannelManagerData { forward_htlcs_legacy: HashMap>, pending_intercepted_htlcs_legacy: HashMap, decode_update_add_htlcs_legacy: HashMap>, + persistent_monitor_events: bool, // The `ChannelManager` version that was written. version: u8, } @@ -18532,6 +18548,7 @@ impl<'a, ES: EntropySource, SP: SignerProvider, L: Logger> let mut peer_storage_dir: Option)>> = None; let mut async_receive_offer_cache: AsyncReceiveOfferCache = AsyncReceiveOfferCache::new(); let mut best_block_previous_blocks = None; + let mut persistent_monitor_events: Option<()> = None; read_tlv_fields!(reader, { (1, pending_outbound_payments_no_retry, option), (2, pending_intercepted_htlcs_legacy, option), @@ -18544,6 +18561,7 @@ impl<'a, ES: EntropySource, SP: SignerProvider, L: Logger> (9, claimable_htlc_purposes, optional_vec), (10, legacy_in_flight_monitor_updates, option), (11, probing_cookie_secret, option), + (12, persistent_monitor_events, option), (13, amountless_claimable_htlc_onion_fields, optional_vec), (14, decode_update_add_htlcs_legacy, option), (15, inbound_payment_id_secret, option), @@ -18553,6 +18571,12 @@ impl<'a, ES: EntropySource, SP: SignerProvider, L: Logger> (23, best_block_previous_blocks, option), }); + #[cfg(not(any(feature = "_test_utils", test)))] + if persistent_monitor_events.is_some() { + // This feature isn't supported yet so error if the writer expected it to be. + return Err(DecodeError::InvalidValue); + } + // Merge legacy pending_outbound_payments fields into a single HashMap. // Priority: pending_outbound_payments (TLV 3) > pending_outbound_payments_no_retry (TLV 1) // > pending_outbound_payments_compat (non-TLV legacy) @@ -18672,6 +18696,7 @@ impl<'a, ES: EntropySource, SP: SignerProvider, L: Logger> peer_storage_dir: peer_storage_dir.unwrap_or_default(), async_receive_offer_cache, version, + persistent_monitor_events: persistent_monitor_events.is_some(), }) } } @@ -18972,6 +18997,7 @@ impl< mut in_flight_monitor_updates, peer_storage_dir, async_receive_offer_cache, + persistent_monitor_events, version: _version, } = data; @@ -20234,6 +20260,8 @@ impl< logger: args.logger, config: RwLock::new(args.config), + persistent_monitor_events, + #[cfg(feature = "_test_utils")] testing_dnssec_proof_offer_resolution_override: Mutex::new(new_hash_map()), }; From ab51f1738779327cdbe516f5ae4f8ebf1b8aec1a Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Wed, 18 Mar 2026 15:08:54 -0400 Subject: [PATCH 04/28] Add helper to push monitor events Cleans up the next commit --- lightning/src/chain/chainmonitor.rs | 53 +++++++-------------------- lightning/src/chain/channelmonitor.rs | 26 +++++++++---- 2 files changed, 32 insertions(+), 47 deletions(-) diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs index 125f206bbea..9162c2d1321 100644 --- a/lightning/src/chain/chainmonitor.rs +++ b/lightning/src/chain/chainmonitor.rs @@ -366,9 +366,6 @@ pub struct ChainMonitor< fee_estimator: F, persister: P, _entropy_source: ES, - /// "User-provided" (ie persistence-completion/-failed) [`MonitorEvent`]s. These came directly - /// from the user and not from a [`ChannelMonitor`]. - pending_monitor_events: Mutex, PublicKey)>>, /// The best block height seen, used as a proxy for the passage of time. highest_chain_height: AtomicUsize, @@ -436,7 +433,6 @@ where logger, fee_estimator: feeest, _entropy_source, - pending_monitor_events: Mutex::new(Vec::new()), highest_chain_height: AtomicUsize::new(0), event_notifier: Arc::clone(&event_notifier), persister: AsyncPersister { persister, event_notifier }, @@ -655,7 +651,6 @@ where fee_estimator: feeest, persister, _entropy_source, - pending_monitor_events: Mutex::new(Vec::new()), highest_chain_height: AtomicUsize::new(0), event_notifier: Arc::new(Notifier::new()), pending_send_only_events: Mutex::new(Vec::new()), @@ -800,16 +795,11 @@ where return Ok(()); } let funding_txo = monitor_data.monitor.get_funding_txo(); - self.pending_monitor_events.lock().unwrap().push(( + monitor_data.monitor.push_monitor_event(MonitorEvent::Completed { funding_txo, channel_id, - vec![MonitorEvent::Completed { - funding_txo, - channel_id, - monitor_update_id: monitor_data.monitor.get_latest_update_id(), - }], - monitor_data.monitor.get_counterparty_node_id(), - )); + monitor_update_id: monitor_data.monitor.get_latest_update_id(), + }); self.event_notifier.notify(); Ok(()) @@ -822,14 +812,11 @@ where pub fn force_channel_monitor_updated(&self, channel_id: ChannelId, monitor_update_id: u64) { let monitors = self.monitors.read().unwrap(); let monitor = &monitors.get(&channel_id).unwrap().monitor; - let counterparty_node_id = monitor.get_counterparty_node_id(); - let funding_txo = monitor.get_funding_txo(); - self.pending_monitor_events.lock().unwrap().push(( - funding_txo, + monitor.push_monitor_event(MonitorEvent::Completed { + funding_txo: monitor.get_funding_txo(), channel_id, - vec![MonitorEvent::Completed { funding_txo, channel_id, monitor_update_id }], - counterparty_node_id, - )); + monitor_update_id, + }); self.event_notifier.notify(); } @@ -1264,21 +1251,13 @@ where // The channel is post-close (funding spend seen, lockdown, or // holder tx signed). Return InProgress so ChannelManager freezes // the channel until the force-close MonitorEvents are processed. - // Push a Completed event into pending_monitor_events so it gets - // picked up after the per-monitor events in the next - // release_pending_monitor_events call. - let funding_txo = monitor.get_funding_txo(); - let channel_id = monitor.channel_id(); - self.pending_monitor_events.lock().unwrap().push(( - funding_txo, - channel_id, - vec![MonitorEvent::Completed { - funding_txo, - channel_id, - monitor_update_id: monitor.get_latest_update_id(), - }], - monitor.get_counterparty_node_id(), - )); + // Push a Completed event into the monitor so it gets picked up + // in the next release_pending_monitor_events call. + monitor.push_monitor_event(MonitorEvent::Completed { + funding_txo: monitor.get_funding_txo(), + channel_id: monitor.channel_id(), + monitor_update_id: monitor.get_latest_update_id(), + }); log_debug!( logger, "Deferring completion of ChannelMonitorUpdate id {:?} (channel is post-close)", @@ -1663,10 +1642,6 @@ where )); } } - // Drain pending_monitor_events (which includes deferred post-close - // completions) after per-monitor events so that force-close - // MonitorEvents are processed by ChannelManager first. - pending_monitor_events.extend(self.pending_monitor_events.lock().unwrap().split_off(0)); pending_monitor_events } } diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index fdc07c9576f..fa81910011f 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -183,6 +183,10 @@ impl Readable for ChannelMonitorUpdate { } } +fn push_monitor_event(pending_monitor_events: &mut Vec, event: MonitorEvent) { + pending_monitor_events.push(event); +} + /// An event to be processed by the ChannelManager. #[derive(Clone, PartialEq, Eq)] pub enum MonitorEvent { @@ -226,8 +230,6 @@ pub enum MonitorEvent { }, } impl_writeable_tlv_based_enum_upgradable_legacy!(MonitorEvent, - // Note that Completed is currently never serialized to disk as it is generated only in - // ChainMonitor. (0, Completed) => { (0, funding_txo, required), (2, monitor_update_id, required), @@ -2166,6 +2168,10 @@ impl ChannelMonitor { self.inner.lock().unwrap().get_and_clear_pending_monitor_events() } + pub(super) fn push_monitor_event(&self, event: MonitorEvent) { + self.inner.lock().unwrap().push_monitor_event(event); + } + /// Processes [`SpendableOutputs`] events produced from each [`ChannelMonitor`] upon maturity. /// /// For channels featuring anchor outputs, this method will also process [`BumpTransaction`] @@ -3891,7 +3897,7 @@ impl ChannelMonitorImpl { outpoint: funding_outpoint, channel_id: self.channel_id, }; - self.pending_monitor_events.push(event); + push_monitor_event(&mut self.pending_monitor_events, event); } // Although we aren't signing the transaction directly here, the transaction will be signed @@ -4552,6 +4558,10 @@ impl ChannelMonitorImpl { &self.outputs_to_watch } + fn push_monitor_event(&mut self, event: MonitorEvent) { + push_monitor_event(&mut self.pending_monitor_events, event); + } + fn get_and_clear_pending_monitor_events(&mut self) -> Vec { let mut ret = Vec::new(); mem::swap(&mut ret, &mut self.pending_monitor_events); @@ -5608,7 +5618,7 @@ impl ChannelMonitorImpl { ); log_info!(logger, "Channel closed by funding output spend in txid {txid}"); if !self.funding_spend_seen { - self.pending_monitor_events.push(MonitorEvent::CommitmentTxConfirmed(())); + self.push_monitor_event(MonitorEvent::CommitmentTxConfirmed(())); } self.funding_spend_seen = true; @@ -5783,7 +5793,7 @@ impl ChannelMonitorImpl { log_debug!(logger, "HTLC {} failure update in {} has got enough confirmations to be passed upstream", &payment_hash, entry.txid); - self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { + self.push_monitor_event(MonitorEvent::HTLCEvent(HTLCUpdate { payment_hash, payment_preimage: None, source, @@ -5893,7 +5903,7 @@ impl ChannelMonitorImpl { log_error!(logger, "Failing back HTLC {} upstream to preserve the \ channel as the forward HTLC hasn't resolved and our backward HTLC \ expires soon at {}", log_bytes!(htlc.payment_hash.0), inbound_htlc_expiry); - self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { + push_monitor_event(&mut self.pending_monitor_events, MonitorEvent::HTLCEvent(HTLCUpdate { source: source.clone(), payment_preimage: None, payment_hash: htlc.payment_hash, @@ -6310,7 +6320,7 @@ impl ChannelMonitorImpl { }, }); self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage); - self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { + push_monitor_event(&mut self.pending_monitor_events, MonitorEvent::HTLCEvent(HTLCUpdate { source, payment_preimage: Some(payment_preimage), payment_hash, @@ -6334,7 +6344,7 @@ impl ChannelMonitorImpl { }, }); self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage); - self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { + push_monitor_event(&mut self.pending_monitor_events, MonitorEvent::HTLCEvent(HTLCUpdate { source, payment_preimage: Some(payment_preimage), payment_hash, From 0e26c76c7ee1e4a047ac5801ece6d0fcfada066c Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Wed, 18 Mar 2026 17:18:26 -0400 Subject: [PATCH 05/28] Rename pending_monitor_events to _legacy This field will be deprecated in upcoming commits when we start persisting MonitorEvent ids alongside the MonitorEvents. --- lightning/src/chain/channelmonitor.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index fa81910011f..74a5a07c0d2 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -1736,7 +1736,7 @@ pub(crate) fn write_chanmon_internal( None } }); - let pending_monitor_events = Iterable( + let pending_monitor_events_legacy = Iterable( channel_monitor.pending_monitor_events.iter().chain(holder_force_closed_compat.as_ref()), ); @@ -1747,7 +1747,7 @@ pub(crate) fn write_chanmon_internal( (1, channel_monitor.funding_spend_confirmed, option), (2, persistent_events_enabled, option), (3, channel_monitor.htlcs_resolved_on_chain, required_vec), - (5, pending_monitor_events, required), // Equivalent to required_vec because Iterable also writes as WithoutLength + (5, pending_monitor_events_legacy, required), // Equivalent to required_vec because Iterable also writes as WithoutLength (7, channel_monitor.funding_spend_seen, required), (9, channel_monitor.counterparty_node_id, required), (11, channel_monitor.confirmed_commitment_tx_counterparty_output, option), @@ -6642,16 +6642,16 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP } } - let pending_monitor_events_len: u64 = Readable::read(reader)?; - let mut pending_monitor_events = Some( - Vec::with_capacity(cmp::min(pending_monitor_events_len as usize, MAX_ALLOC_SIZE / (32 + 8*3)))); - for _ in 0..pending_monitor_events_len { + let pending_monitor_events_legacy_len: u64 = Readable::read(reader)?; + let mut pending_monitor_events_legacy = Some( + Vec::with_capacity(cmp::min(pending_monitor_events_legacy_len as usize, MAX_ALLOC_SIZE / (32 + 8*3)))); + for _ in 0..pending_monitor_events_legacy_len { let ev = match ::read(reader)? { 0 => MonitorEvent::HTLCEvent(Readable::read(reader)?), 1 => MonitorEvent::HolderForceClosed(outpoint), _ => return Err(DecodeError::InvalidValue) }; - pending_monitor_events.as_mut().unwrap().push(ev); + pending_monitor_events_legacy.as_mut().unwrap().push(ev); } let pending_events_len: u64 = Readable::read(reader)?; @@ -6718,7 +6718,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP (1, funding_spend_confirmed, option), (2, persistent_events_enabled, option), (3, htlcs_resolved_on_chain, optional_vec), - (5, pending_monitor_events, optional_vec), + (5, pending_monitor_events_legacy, optional_vec), (7, funding_spend_seen, option), (9, counterparty_node_id, option), (11, confirmed_commitment_tx_counterparty_output, option), @@ -6771,11 +6771,11 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP // `HolderForceClosedWithInfo` replaced `HolderForceClosed` in v0.0.122. If we have both // events, we can remove the `HolderForceClosed` event and just keep the `HolderForceClosedWithInfo`. - if let Some(ref mut pending_monitor_events) = pending_monitor_events { - if pending_monitor_events.iter().any(|e| matches!(e, MonitorEvent::HolderForceClosed(_))) && - pending_monitor_events.iter().any(|e| matches!(e, MonitorEvent::HolderForceClosedWithInfo { .. })) + if let Some(ref mut evs) = pending_monitor_events_legacy { + if evs.iter().any(|e| matches!(e, MonitorEvent::HolderForceClosed(_))) && + evs.iter().any(|e| matches!(e, MonitorEvent::HolderForceClosedWithInfo { .. })) { - pending_monitor_events.retain(|e| !matches!(e, MonitorEvent::HolderForceClosed(_))); + evs.retain(|e| !matches!(e, MonitorEvent::HolderForceClosed(_))); } } @@ -6896,7 +6896,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP current_holder_commitment_number, payment_preimages, - pending_monitor_events: pending_monitor_events.unwrap(), + pending_monitor_events: pending_monitor_events_legacy.unwrap(), persistent_events_enabled: persistent_events_enabled.is_some(), pending_events, is_processing_pending_events: false, From 5b0f9ced32ba77e458fe3ad48681919444424738 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 13:37:38 -0400 Subject: [PATCH 06/28] Add chain::Watch ack_monitor_event API Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we add an as-yet-unused API to chain::Watch to allow the ChannelManager to tell the a ChannelMonitor that a MonitorEvent has been irrevocably processed and can be deleted. --- fuzz/src/chanmon_consistency.rs | 5 +++++ lightning/src/chain/chainmonitor.rs | 24 ++++++++++++++++++++++++ lightning/src/chain/channelmonitor.rs | 6 ++++++ lightning/src/chain/mod.rs | 14 ++++++++++++++ lightning/src/util/test_utils.rs | 6 +++++- 5 files changed, 54 insertions(+), 1 deletion(-) diff --git a/fuzz/src/chanmon_consistency.rs b/fuzz/src/chanmon_consistency.rs index 4ee0de17621..de00fe142e6 100644 --- a/fuzz/src/chanmon_consistency.rs +++ b/fuzz/src/chanmon_consistency.rs @@ -41,6 +41,7 @@ use lightning::chain; use lightning::chain::chaininterface::{ BroadcasterInterface, ConfirmationTarget, FeeEstimator, TransactionType, }; +use lightning::chain::chainmonitor::MonitorEventSource; use lightning::chain::channelmonitor::{ChannelMonitor, MonitorEvent}; use lightning::chain::transaction::OutPoint; use lightning::chain::{ @@ -367,6 +368,10 @@ impl chain::Watch for TestChainMonitor { ) -> Vec<(OutPoint, ChannelId, Vec, PublicKey)> { return self.chain_monitor.release_pending_monitor_events(); } + + fn ack_monitor_event(&self, source: MonitorEventSource) { + self.chain_monitor.ack_monitor_event(source); + } } struct KeyProvider { diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs index 9162c2d1321..51227684cde 100644 --- a/lightning/src/chain/chainmonitor.rs +++ b/lightning/src/chain/chainmonitor.rs @@ -66,6 +66,21 @@ use core::iter::Cycle; use core::ops::Deref; use core::sync::atomic::{AtomicUsize, Ordering}; +/// Identifies the source of a [`MonitorEvent`] for acknowledgment via +/// [`chain::Watch::ack_monitor_event`] once the event has been processed. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct MonitorEventSource { + /// The event ID assigned by the [`ChannelMonitor`]. + pub event_id: u64, + /// The channel from which the [`MonitorEvent`] originated. + pub channel_id: ChannelId, +} + +impl_writeable_tlv_based!(MonitorEventSource, { + (1, event_id, required), + (3, channel_id, required), +}); + /// A pending operation queued for later execution when `ChainMonitor` is in deferred mode. enum PendingMonitorOp { /// A new monitor to insert and persist. @@ -1644,6 +1659,15 @@ where } pending_monitor_events } + + fn ack_monitor_event(&self, source: MonitorEventSource) { + let monitors = self.monitors.read().unwrap(); + if let Some(monitor_state) = monitors.get(&source.channel_id) { + monitor_state.monitor.ack_monitor_event(source.event_id); + } else { + debug_assert!(false, "Ack'd monitor events should always have a corresponding monitor"); + } + } } impl< diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 74a5a07c0d2..6cf053c51bf 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2172,6 +2172,12 @@ impl ChannelMonitor { self.inner.lock().unwrap().push_monitor_event(event); } + /// Removes a [`MonitorEvent`] by its event ID, acknowledging that it has been processed. + /// Generally called by [`chain::Watch::ack_monitor_event`]. + pub fn ack_monitor_event(&self, _event_id: u64) { + // TODO: once events have ids, remove the corresponding event here + } + /// Processes [`SpendableOutputs`] events produced from each [`ChannelMonitor`] upon maturity. /// /// For channels featuring anchor outputs, this method will also process [`BumpTransaction`] diff --git a/lightning/src/chain/mod.rs b/lightning/src/chain/mod.rs index 9692558cf7c..59400cc8da9 100644 --- a/lightning/src/chain/mod.rs +++ b/lightning/src/chain/mod.rs @@ -18,6 +18,7 @@ use bitcoin::network::Network; use bitcoin::script::{Script, ScriptBuf}; use bitcoin::secp256k1::PublicKey; +use crate::chain::chainmonitor::MonitorEventSource; use crate::chain::channelmonitor::{ ChannelMonitor, ChannelMonitorUpdate, MonitorEvent, ANTI_REORG_DELAY, }; @@ -425,6 +426,15 @@ pub trait Watch { fn release_pending_monitor_events( &self, ) -> Vec<(OutPoint, ChannelId, Vec, PublicKey)>; + + /// Acknowledges and removes a [`MonitorEvent`] previously returned by + /// [`Watch::release_pending_monitor_events`] by its event ID. + /// + /// Once acknowledged, the event will no longer be returned by future calls to + /// [`Watch::release_pending_monitor_events`] and will not be replayed on restart. + /// + /// Events may be acknowledged in any order. + fn ack_monitor_event(&self, source: MonitorEventSource); } impl + ?Sized, W: Deref> @@ -447,6 +457,10 @@ impl + ?Sized, W: Der ) -> Vec<(OutPoint, ChannelId, Vec, PublicKey)> { self.deref().release_pending_monitor_events() } + + fn ack_monitor_event(&self, source: MonitorEventSource) { + self.deref().ack_monitor_event(source) + } } /// The `Filter` trait defines behavior for indicating chain activity of interest pertaining to diff --git a/lightning/src/util/test_utils.rs b/lightning/src/util/test_utils.rs index 57f9ba6b22f..c31a378be1a 100644 --- a/lightning/src/util/test_utils.rs +++ b/lightning/src/util/test_utils.rs @@ -15,7 +15,7 @@ use crate::chain::chaininterface; #[cfg(any(test, feature = "_externalize_tests"))] use crate::chain::chaininterface::FEERATE_FLOOR_SATS_PER_KW; use crate::chain::chaininterface::{ConfirmationTarget, TransactionType}; -use crate::chain::chainmonitor::{ChainMonitor, Persist}; +use crate::chain::chainmonitor::{ChainMonitor, MonitorEventSource, Persist}; use crate::chain::channelmonitor::{ ChannelMonitor, ChannelMonitorUpdate, ChannelMonitorUpdateStep, MonitorEvent, }; @@ -734,6 +734,10 @@ impl<'a> chain::Watch for TestChainMonitor<'a> { } return self.chain_monitor.release_pending_monitor_events(); } + + fn ack_monitor_event(&self, source: MonitorEventSource) { + self.chain_monitor.ack_monitor_event(source); + } } #[cfg(any(test, feature = "_externalize_tests"))] From b23c1a2e509b50cc3e85556d82b84577e33682c5 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Wed, 18 Mar 2026 17:24:04 -0400 Subject: [PATCH 07/28] Add monitor event ids Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. To allow the ChannelManager to ack specific monitor events once they are resolved in upcoming commits, here we give each MonitorEvent a corresponding unique id. It's implemented in such a way that we can delete legacy monitor event code in the future when the new persistent monitor events flag is enabled by default. --- fuzz/src/chanmon_consistency.rs | 2 +- lightning/src/chain/chainmonitor.rs | 2 +- lightning/src/chain/channelmonitor.rs | 200 +++++++++++++----- lightning/src/chain/mod.rs | 8 +- lightning/src/ln/chanmon_update_fail_tests.rs | 6 +- lightning/src/ln/channelmanager.rs | 2 +- lightning/src/util/test_utils.rs | 6 +- 7 files changed, 161 insertions(+), 65 deletions(-) diff --git a/fuzz/src/chanmon_consistency.rs b/fuzz/src/chanmon_consistency.rs index de00fe142e6..f2488999626 100644 --- a/fuzz/src/chanmon_consistency.rs +++ b/fuzz/src/chanmon_consistency.rs @@ -365,7 +365,7 @@ impl chain::Watch for TestChainMonitor { fn release_pending_monitor_events( &self, - ) -> Vec<(OutPoint, ChannelId, Vec, PublicKey)> { + ) -> Vec<(OutPoint, ChannelId, Vec<(u64, MonitorEvent)>, PublicKey)> { return self.chain_monitor.release_pending_monitor_events(); } diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs index 51227684cde..bc11f3cf591 100644 --- a/lightning/src/chain/chainmonitor.rs +++ b/lightning/src/chain/chainmonitor.rs @@ -1637,7 +1637,7 @@ where fn release_pending_monitor_events( &self, - ) -> Vec<(OutPoint, ChannelId, Vec, PublicKey)> { + ) -> Vec<(OutPoint, ChannelId, Vec<(u64, MonitorEvent)>, PublicKey)> { for (channel_id, update_id) in self.persister.get_and_clear_completed_updates() { let _ = self.channel_monitor_updated(channel_id, update_id); } diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 6cf053c51bf..c48512ec228 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -183,8 +183,13 @@ impl Readable for ChannelMonitorUpdate { } } -fn push_monitor_event(pending_monitor_events: &mut Vec, event: MonitorEvent) { - pending_monitor_events.push(event); +fn push_monitor_event( + pending_monitor_events: &mut Vec<(u64, MonitorEvent)>, event: MonitorEvent, + next_monitor_event_id: &mut u64, +) { + let id = *next_monitor_event_id; + *next_monitor_event_id += 1; + pending_monitor_events.push((id, event)); } /// An event to be processed by the ChannelManager. @@ -1282,13 +1287,14 @@ pub(crate) struct ChannelMonitorImpl { // Note that because the `event_lock` in `ChainMonitor` is only taken in // block/transaction-connected events and *not* during block/transaction-disconnected events, // we further MUST NOT generate events during block/transaction-disconnection. - pending_monitor_events: Vec, + pending_monitor_events: Vec<(u64, MonitorEvent)>, /// When set, monitor events are retained until explicitly acked rather than cleared on read. /// /// Allows the ChannelManager to reconstruct pending HTLC state by replaying monitor events on /// startup, and make the monitor responsible for both off- and on-chain payment resolution. Will /// be always set once support for this feature is complete. persistent_events_enabled: bool, + next_monitor_event_id: u64, pub(super) pending_events: Vec, pub(super) is_processing_pending_events: bool, @@ -1669,32 +1675,38 @@ pub(crate) fn write_chanmon_internal( writer.write_all(&payment_preimage.0[..])?; } - writer.write_all( - &(channel_monitor - .pending_monitor_events - .iter() - .filter(|ev| match ev { - MonitorEvent::HTLCEvent(_) => true, - MonitorEvent::HolderForceClosed(_) => true, - MonitorEvent::HolderForceClosedWithInfo { .. } => true, - _ => false, - }) - .count() as u64) - .to_be_bytes(), - )?; - for event in channel_monitor.pending_monitor_events.iter() { - match event { - MonitorEvent::HTLCEvent(upd) => { - 0u8.write(writer)?; - upd.write(writer)?; - }, - MonitorEvent::HolderForceClosed(_) => 1u8.write(writer)?, - // `HolderForceClosedWithInfo` replaced `HolderForceClosed` in v0.0.122. To keep - // backwards compatibility, we write a `HolderForceClosed` event along with the - // `HolderForceClosedWithInfo` event. This is deduplicated in the reader. - MonitorEvent::HolderForceClosedWithInfo { .. } => 1u8.write(writer)?, - _ => {}, // Covered in the TLV writes below + if !channel_monitor.persistent_events_enabled { + writer.write_all( + &(channel_monitor + .pending_monitor_events + .iter() + .filter(|(_, ev)| match ev { + MonitorEvent::HTLCEvent(_) => true, + MonitorEvent::HolderForceClosed(_) => true, + MonitorEvent::HolderForceClosedWithInfo { .. } => true, + _ => false, + }) + .count() as u64) + .to_be_bytes(), + )?; + for (_, event) in channel_monitor.pending_monitor_events.iter() { + match event { + MonitorEvent::HTLCEvent(upd) => { + 0u8.write(writer)?; + upd.write(writer)?; + }, + MonitorEvent::HolderForceClosed(_) => 1u8.write(writer)?, + // `HolderForceClosedWithInfo` replaced `HolderForceClosed` in v0.0.122. To keep + // backwards compatibility, we write a `HolderForceClosed` event along with the + // `HolderForceClosedWithInfo` event. This is deduplicated in the reader. + MonitorEvent::HolderForceClosedWithInfo { .. } => 1u8.write(writer)?, + _ => {}, // Covered in the TLV writes below + } } + } else { + // If `persistent_events_enabled` is set, we'll write the events with their event ids in the + // TLV section below. + writer.write_all(&(0u64).to_be_bytes())?; } writer.write_all(&(channel_monitor.pending_events.len() as u64).to_be_bytes())?; @@ -1729,25 +1741,40 @@ pub(crate) fn write_chanmon_internal( // If we have a `HolderForceClosedWithInfo` event, we need to write the `HolderForceClosed` // for backwards compatibility. - let holder_force_closed_compat = channel_monitor.pending_monitor_events.iter().find_map(|ev| { - if let MonitorEvent::HolderForceClosedWithInfo { outpoint, .. } = ev { - Some(MonitorEvent::HolderForceClosed(*outpoint)) - } else { - None - } - }); - let pending_monitor_events_legacy = Iterable( - channel_monitor.pending_monitor_events.iter().chain(holder_force_closed_compat.as_ref()), - ); + let holder_force_closed_compat = + channel_monitor.pending_monitor_events.iter().find_map(|(_, ev)| { + if let MonitorEvent::HolderForceClosedWithInfo { outpoint, .. } = ev { + Some(MonitorEvent::HolderForceClosed(*outpoint)) + } else { + None + } + }); + let pending_monitor_events_legacy = if !channel_monitor.persistent_events_enabled { + Some(Iterable( + channel_monitor + .pending_monitor_events + .iter() + .map(|(_, ev)| ev) + .chain(holder_force_closed_compat.as_ref()), + )) + } else { + None + }; // Only write `persistent_events_enabled` if it's set to true, as it's an even TLV. let persistent_events_enabled = channel_monitor.persistent_events_enabled.then_some(()); + let pending_mon_evs_with_ids = if persistent_events_enabled.is_some() { + Some(Iterable(channel_monitor.pending_monitor_events.iter())) + } else { + None + }; write_tlv_fields!(writer, { (1, channel_monitor.funding_spend_confirmed, option), (2, persistent_events_enabled, option), (3, channel_monitor.htlcs_resolved_on_chain, required_vec), - (5, pending_monitor_events_legacy, required), // Equivalent to required_vec because Iterable also writes as WithoutLength + (4, pending_mon_evs_with_ids, option), + (5, pending_monitor_events_legacy, option), // Equivalent to optional_vec because Iterable also writes as WithoutLength (7, channel_monitor.funding_spend_seen, required), (9, channel_monitor.counterparty_node_id, required), (11, channel_monitor.confirmed_commitment_tx_counterparty_output, option), @@ -1767,6 +1794,7 @@ pub(crate) fn write_chanmon_internal( (35, channel_monitor.is_manual_broadcast, required), (37, channel_monitor.funding_seen_onchain, required), (39, channel_monitor.best_block.previous_blocks, required), + (41, channel_monitor.next_monitor_event_id, required), }); Ok(()) @@ -1951,6 +1979,7 @@ impl ChannelMonitor { payment_preimages: new_hash_map(), pending_monitor_events: Vec::new(), persistent_events_enabled: false, + next_monitor_event_id: 0, pending_events: Vec::new(), is_processing_pending_events: false, @@ -2164,7 +2193,7 @@ impl ChannelMonitor { /// Get the list of HTLCs who's status has been updated on chain. This should be called by /// ChannelManager via [`chain::Watch::release_pending_monitor_events`]. - pub fn get_and_clear_pending_monitor_events(&self) -> Vec { + pub fn get_and_clear_pending_monitor_events(&self) -> Vec<(u64, MonitorEvent)> { self.inner.lock().unwrap().get_and_clear_pending_monitor_events() } @@ -2178,6 +2207,20 @@ impl ChannelMonitor { // TODO: once events have ids, remove the corresponding event here } + /// Copies [`MonitorEvent`] state from `other` into `self`. + /// Used in tests to align transient runtime state before equality comparison after a + /// serialization round-trip. + #[cfg(any(test, feature = "_test_utils"))] + pub fn copy_monitor_event_state(&self, other: &ChannelMonitor) { + let (pending, next_id) = { + let other_inner = other.inner.lock().unwrap(); + (other_inner.pending_monitor_events.clone(), other_inner.next_monitor_event_id) + }; + let mut self_inner = self.inner.lock().unwrap(); + self_inner.pending_monitor_events = pending; + self_inner.next_monitor_event_id = next_id; + } + /// Processes [`SpendableOutputs`] events produced from each [`ChannelMonitor`] upon maturity. /// /// For channels featuring anchor outputs, this method will also process [`BumpTransaction`] @@ -3903,7 +3946,7 @@ impl ChannelMonitorImpl { outpoint: funding_outpoint, channel_id: self.channel_id, }; - push_monitor_event(&mut self.pending_monitor_events, event); + push_monitor_event(&mut self.pending_monitor_events, event, &mut self.next_monitor_event_id); } // Although we aren't signing the transaction directly here, the transaction will be signed @@ -4494,12 +4537,16 @@ impl ChannelMonitorImpl { "Failing HTLC from late counterparty commitment update immediately \ (funding spend already confirmed)" ); - self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { - payment_hash, - payment_preimage: None, - source: source.clone(), - htlc_value_satoshis, - })); + push_monitor_event( + &mut self.pending_monitor_events, + MonitorEvent::HTLCEvent(HTLCUpdate { + payment_hash, + payment_preimage: None, + source: source.clone(), + htlc_value_satoshis, + }), + &mut self.next_monitor_event_id, + ); self.htlcs_resolved_on_chain.push(IrrevocablyResolvedHTLC { commitment_tx_output_idx: None, resolving_txid: Some(confirmed_txid), @@ -4565,10 +4612,14 @@ impl ChannelMonitorImpl { } fn push_monitor_event(&mut self, event: MonitorEvent) { - push_monitor_event(&mut self.pending_monitor_events, event); + push_monitor_event( + &mut self.pending_monitor_events, + event, + &mut self.next_monitor_event_id, + ); } - fn get_and_clear_pending_monitor_events(&mut self) -> Vec { + fn get_and_clear_pending_monitor_events(&mut self) -> Vec<(u64, MonitorEvent)> { let mut ret = Vec::new(); mem::swap(&mut ret, &mut self.pending_monitor_events); ret @@ -5896,7 +5947,7 @@ impl ChannelMonitorImpl { continue; } let duplicate_event = self.pending_monitor_events.iter().any( - |update| if let &MonitorEvent::HTLCEvent(ref upd) = update { + |(_, update)| if let &MonitorEvent::HTLCEvent(ref upd) = update { upd.source == *source } else { false }); if duplicate_event { @@ -5914,7 +5965,7 @@ impl ChannelMonitorImpl { payment_preimage: None, payment_hash: htlc.payment_hash, htlc_value_satoshis: Some(htlc.amount_msat / 1000), - })); + }), &mut self.next_monitor_event_id); } } } @@ -6313,7 +6364,7 @@ impl ChannelMonitorImpl { if let Some((source, payment_hash, amount_msat)) = payment_data { if accepted_preimage_claim { if !self.pending_monitor_events.iter().any( - |update| if let &MonitorEvent::HTLCEvent(ref upd) = update { upd.source == source } else { false }) { + |(_, update)| if let &MonitorEvent::HTLCEvent(ref upd) = update { upd.source == source } else { false }) { self.onchain_events_awaiting_threshold_conf.push(OnchainEventEntry { txid: tx.compute_txid(), height, @@ -6331,11 +6382,11 @@ impl ChannelMonitorImpl { payment_preimage: Some(payment_preimage), payment_hash, htlc_value_satoshis: Some(amount_msat / 1000), - })); + }), &mut self.next_monitor_event_id); } } else if offered_preimage_claim { if !self.pending_monitor_events.iter().any( - |update| if let &MonitorEvent::HTLCEvent(ref upd) = update { + |(_, update)| if let &MonitorEvent::HTLCEvent(ref upd) = update { upd.source == source } else { false }) { self.onchain_events_awaiting_threshold_conf.push(OnchainEventEntry { @@ -6355,7 +6406,7 @@ impl ChannelMonitorImpl { payment_preimage: Some(payment_preimage), payment_hash, htlc_value_satoshis: Some(amount_msat / 1000), - })); + }), &mut self.next_monitor_event_id); } } else { self.onchain_events_awaiting_threshold_conf.retain(|ref entry| { @@ -6720,10 +6771,13 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP let mut funding_seen_onchain = RequiredWrapper(None); let mut best_block_previous_blocks = None; let mut persistent_events_enabled: Option<()> = None; + let mut next_monitor_event_id: Option = None; + let mut pending_mon_evs_with_ids: Option> = None; read_tlv_fields!(reader, { (1, funding_spend_confirmed, option), (2, persistent_events_enabled, option), (3, htlcs_resolved_on_chain, optional_vec), + (4, pending_mon_evs_with_ids, optional_vec), (5, pending_monitor_events_legacy, optional_vec), (7, funding_spend_seen, option), (9, counterparty_node_id, option), @@ -6744,6 +6798,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP (35, is_manual_broadcast, (default_value, false)), (37, funding_seen_onchain, (default_value, true)), (39, best_block_previous_blocks, option), // Added and always set in 0.3 + (41, next_monitor_event_id, option), }); if let Some(previous_blocks) = best_block_previous_blocks { best_block.previous_blocks = previous_blocks; @@ -6785,6 +6840,22 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP } } + // If persistent events are enabled, use the events with their persisted IDs from TLV 4. + // Otherwise, use the legacy events from TLV 5 and assign sequential IDs. + let (next_monitor_event_id, pending_monitor_events): (u64, Vec<(u64, MonitorEvent)>) = + if persistent_events_enabled.is_some() { + let evs = pending_mon_evs_with_ids.unwrap_or_default() + .into_iter().map(|ev| (ev.0, ev.1)).collect(); + (next_monitor_event_id.unwrap_or(0), evs) + } else if let Some(events) = pending_monitor_events_legacy { + let next_id = next_monitor_event_id.unwrap_or(events.len() as u64); + let evs = events.into_iter().enumerate() + .map(|(i, ev)| (i as u64, ev)).collect(); + (next_id, evs) + } else { + (next_monitor_event_id.unwrap_or(0), Vec::new()) + }; + let channel_parameters = channel_parameters.unwrap_or_else(|| { onchain_tx_handler.channel_parameters().clone() }); @@ -6902,8 +6973,9 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP current_holder_commitment_number, payment_preimages, - pending_monitor_events: pending_monitor_events_legacy.unwrap(), + pending_monitor_events, persistent_events_enabled: persistent_events_enabled.is_some(), + next_monitor_event_id, pending_events, is_processing_pending_events: false, @@ -6952,6 +7024,22 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP } } +/// Deserialization wrapper for reading a `(u64, MonitorEvent)`. +/// Necessary because we can't deserialize a (Readable, MaybeReadable) tuple due to trait +/// conflicts. +struct ReadableIdMonitorEvent(u64, MonitorEvent); + +impl MaybeReadable for ReadableIdMonitorEvent { + fn read(reader: &mut R) -> Result, DecodeError> { + let id: u64 = Readable::read(reader)?; + let event_opt: Option = MaybeReadable::read(reader)?; + match event_opt { + Some(ev) => Ok(Some(ReadableIdMonitorEvent(id, ev))), + None => Ok(None), + } + } +} + #[cfg(test)] pub(super) fn dummy_monitor( channel_id: ChannelId, wrap_signer: impl FnOnce(crate::sign::InMemorySigner) -> S, diff --git a/lightning/src/chain/mod.rs b/lightning/src/chain/mod.rs index 59400cc8da9..b7ff2cb917c 100644 --- a/lightning/src/chain/mod.rs +++ b/lightning/src/chain/mod.rs @@ -417,6 +417,10 @@ pub trait Watch { /// Returns any monitor events since the last call. Subsequent calls must only return new /// events. /// + /// Each event comes with a corresponding id. Once the event is processed, call + /// [`Watch::ack_monitor_event`] with the corresponding id and channel id. Unacknowledged events + /// will be re-provided by this method after startup. + /// /// Note that after any block- or transaction-connection calls to a [`ChannelMonitor`], no /// further events may be returned here until the [`ChannelMonitor`] has been fully persisted /// to disk. @@ -425,7 +429,7 @@ pub trait Watch { /// [`MonitorEvent::Completed`] here, see [`ChannelMonitorUpdateStatus::InProgress`]. fn release_pending_monitor_events( &self, - ) -> Vec<(OutPoint, ChannelId, Vec, PublicKey)>; + ) -> Vec<(OutPoint, ChannelId, Vec<(u64, MonitorEvent)>, PublicKey)>; /// Acknowledges and removes a [`MonitorEvent`] previously returned by /// [`Watch::release_pending_monitor_events`] by its event ID. @@ -454,7 +458,7 @@ impl + ?Sized, W: Der fn release_pending_monitor_events( &self, - ) -> Vec<(OutPoint, ChannelId, Vec, PublicKey)> { + ) -> Vec<(OutPoint, ChannelId, Vec<(u64, MonitorEvent)>, PublicKey)> { self.deref().release_pending_monitor_events() } diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index dd799c2c27c..796b66830d1 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -5016,7 +5016,7 @@ fn native_async_persist() { let completed_persist = async_chain_monitor.release_pending_monitor_events(); assert_eq!(completed_persist.len(), 1); assert_eq!(completed_persist[0].2.len(), 1); - assert!(matches!(completed_persist[0].2[0], MonitorEvent::Completed { .. })); + assert!(matches!(completed_persist[0].2[0].1, MonitorEvent::Completed { .. })); // Now test two async `ChannelMonitorUpdate`s in flight at once, completing them in-order but // separately. @@ -5064,7 +5064,7 @@ fn native_async_persist() { let completed_persist = async_chain_monitor.release_pending_monitor_events(); assert_eq!(completed_persist.len(), 1); assert_eq!(completed_persist[0].2.len(), 1); - assert!(matches!(completed_persist[0].2[0], MonitorEvent::Completed { .. })); + assert!(matches!(completed_persist[0].2[0].1, MonitorEvent::Completed { .. })); // Finally, test two async `ChanelMonitorUpdate`s in flight at once, completing them // out-of-order and ensuring that no `MonitorEvent::Completed` is generated until they are both @@ -5110,7 +5110,7 @@ fn native_async_persist() { let completed_persist = async_chain_monitor.release_pending_monitor_events(); assert_eq!(completed_persist.len(), 1); assert_eq!(completed_persist[0].2.len(), 1); - if let MonitorEvent::Completed { monitor_update_id, .. } = &completed_persist[0].2[0] { + if let (_, MonitorEvent::Completed { monitor_update_id, .. }) = &completed_persist[0].2[0] { assert_eq!(*monitor_update_id, 4); } else { panic!(); diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 06296af443c..2a62b4540b9 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -13538,7 +13538,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ for (funding_outpoint, channel_id, mut monitor_events, counterparty_node_id) in pending_monitor_events.drain(..) { - for monitor_event in monitor_events.drain(..) { + for (_event_id, monitor_event) in monitor_events.drain(..) { match monitor_event { MonitorEvent::HTLCEvent(htlc_update) => { let logger = WithContext::from( diff --git a/lightning/src/util/test_utils.rs b/lightning/src/util/test_utils.rs index c31a378be1a..0961f31cfe6 100644 --- a/lightning/src/util/test_utils.rs +++ b/lightning/src/util/test_utils.rs @@ -649,6 +649,7 @@ impl<'a> chain::Watch for TestChainMonitor<'a> { ) .unwrap() .1; + new_monitor.copy_monitor_event_state(&monitor); assert!(new_monitor == monitor); self.latest_monitor_update_id .lock() @@ -710,6 +711,9 @@ impl<'a> chain::Watch for TestChainMonitor<'a> { // it so it doesn't leak into the rest of the test. let failed_back = monitor.inner.lock().unwrap().failed_back_htlc_ids.clone(); new_monitor.inner.lock().unwrap().failed_back_htlc_ids = failed_back; + // The deserialized monitor will reset the monitor event state, so copy it from the live + // monitor before comparing. + new_monitor.copy_monitor_event_state(&monitor); if let Some(chan_id) = self.expect_monitor_round_trip_fail.lock().unwrap().take() { assert_eq!(chan_id, channel_id); assert!(new_monitor != *monitor); @@ -723,7 +727,7 @@ impl<'a> chain::Watch for TestChainMonitor<'a> { fn release_pending_monitor_events( &self, - ) -> Vec<(OutPoint, ChannelId, Vec, PublicKey)> { + ) -> Vec<(OutPoint, ChannelId, Vec<(u64, MonitorEvent)>, PublicKey)> { // Auto-flush pending operations so that the ChannelManager can pick up monitor // completion events. When not in deferred mode the queue is empty so this only // costs a lock acquisition. It ensures standard test helpers (route_payment, etc.) From 657b2e6d8b5bab9bd1a29f3df4748cfcd2f8b634 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 16:42:19 -0400 Subject: [PATCH 08/28] Add MonitorUpdateCompletionAction::AckMonitorEvents Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we add a MonitorUpdateCompletionAction that will allow the ChannelManager to tell a monitor that an event is complete, upon completion of a particular ChannelMonitorUpdate. For example, this will be used when an HTLC is irrevocably failed backwards post-channel-close, to delete the corresponding MonitorEvent::HTLCEvent. --- lightning/src/ln/channelmanager.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 2a62b4540b9..69e3caebe04 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -42,6 +42,7 @@ use crate::chain::chaininterface::{ BroadcasterInterface, ConfirmationTarget, FeeEstimator, LowerBoundedFeeEstimator, TransactionType, }; +use crate::chain::chainmonitor::MonitorEventSource; use crate::chain::channelmonitor::{ ChannelMonitor, ChannelMonitorUpdate, ChannelMonitorUpdateStep, MonitorEvent, WithChannelMonitor, ANTI_REORG_DELAY, CLTV_CLAIM_BUFFER, HTLC_FAIL_BACK_BUFFER, @@ -1492,6 +1493,10 @@ pub(crate) enum MonitorUpdateCompletionAction { blocking_action: RAAMonitorUpdateBlockingAction, downstream_channel_id: ChannelId, }, + /// Indicates that one or more [`MonitorEvent`]s should be acknowledged via + /// [`chain::Watch::ack_monitor_event`] once the associated [`ChannelMonitorUpdate`] has been + /// durably persisted. + AckMonitorEvents { monitor_events_to_ack: Vec }, } impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction, @@ -1513,6 +1518,9 @@ impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction, (0, event, upgradable_option), (1, downstream_counterparty_and_funding_outpoint, upgradable_required), }, + (3, AckMonitorEvents) => { + (0, monitor_events_to_ack, required_vec), + }, ); /// Result of attempting to resume a channel after a monitor update completes while locks are held. @@ -10373,6 +10381,11 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ Some(blocking_action), ); }, + MonitorUpdateCompletionAction::AckMonitorEvents { monitor_events_to_ack } => { + for source in monitor_events_to_ack { + self.chain_monitor.ack_monitor_event(source); + } + }, } } From 51861adf01758bc716cbc4c3eaa42b508a130270 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 13:45:21 -0400 Subject: [PATCH 09/28] Ack all monitor events besides HTLCUpdate Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we take care of the monitor events that are trivial to ACK, since all except HTLCEvents can be ACK'd immediately after the event is handled and don't need to be re-processed on startup. --- lightning/src/ln/channelmanager.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 69e3caebe04..9161279545a 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -13551,7 +13551,8 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ for (funding_outpoint, channel_id, mut monitor_events, counterparty_node_id) in pending_monitor_events.drain(..) { - for (_event_id, monitor_event) in monitor_events.drain(..) { + for (event_id, monitor_event) in monitor_events.drain(..) { + let monitor_event_source = MonitorEventSource { event_id, channel_id }; match monitor_event { MonitorEvent::HTLCEvent(htlc_update) => { let logger = WithContext::from( @@ -13634,6 +13635,9 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ failed_channels.push((Err(e), counterparty_node_id)); } } + // Channel close monitor events do not need to be replayed on startup because we + // already check the monitors to see if the channel is closed. + self.chain_monitor.ack_monitor_event(monitor_event_source); }, MonitorEvent::CommitmentTxConfirmed(_) => { let per_peer_state = self.per_peer_state.read().unwrap(); @@ -13655,6 +13659,9 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ failed_channels.push((Err(e), counterparty_node_id)); } } + // Channel close monitor events do not need to be replayed on startup because we + // already check the monitors to see if the channel is closed. + self.chain_monitor.ack_monitor_event(monitor_event_source); }, MonitorEvent::Completed { channel_id, monitor_update_id, .. } => { self.channel_monitor_updated( @@ -13662,6 +13669,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ Some(monitor_update_id), &counterparty_node_id, ); + self.chain_monitor.ack_monitor_event(monitor_event_source); }, } } From 1e980267d5a485862bead9f187cc6af331f5d7dc Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 14:44:59 -0400 Subject: [PATCH 10/28] Fix replay of monitor events for outbounds Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. If an outbound payment resolved on-chain and we get a monitor event for it, we want to mark that monitor event as resolved once the user has processed a PaymentSent or a PaymentFailed event. --- lightning/src/ln/channelmanager.rs | 33 +++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 9161279545a..2332622cf48 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1545,12 +1545,29 @@ enum PostMonitorUpdateChanResume { }, } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, Eq)] pub(crate) struct PaymentCompleteUpdate { counterparty_node_id: PublicKey, channel_funding_outpoint: OutPoint, channel_id: ChannelId, htlc_id: SentHTLCId, + /// The id of the [`MonitorEvent`] that triggered this update. + /// + /// Used when the [`Event`] corresponding to this update's [`EventCompletionAction`] has been + /// processed by the user, to remove the [`MonitorEvent`] from the [`ChannelMonitor`] and prevent + /// it from being re-provided on startup. + monitor_event_id: Option, +} + +impl PartialEq for PaymentCompleteUpdate { + fn eq(&self, other: &Self) -> bool { + self.counterparty_node_id == other.counterparty_node_id + && self.channel_funding_outpoint == other.channel_funding_outpoint + && self.channel_id == other.channel_id + && self.htlc_id == other.htlc_id + // monitor_event_id is excluded: it's metadata about the event source, + // not part of the semantic identity of the payment resolution. + } } impl_writeable_tlv_based!(PaymentCompleteUpdate, { @@ -1558,6 +1575,7 @@ impl_writeable_tlv_based!(PaymentCompleteUpdate, { (3, counterparty_node_id, required), (5, channel_id, required), (7, htlc_id, required), + (9, monitor_event_id, option), }); #[derive(Clone, Debug, PartialEq, Eq)] @@ -9993,6 +10011,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ next_channel_counterparty_node_id: PublicKey, next_channel_outpoint: OutPoint, next_channel_id: ChannelId, next_user_channel_id: Option, attribution_data: Option, send_timestamp: Option, + monitor_event_id: Option, ) { let startup_replay = !self.background_events_processed_since_startup.load(Ordering::Acquire); @@ -10011,6 +10030,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ channel_funding_outpoint: next_channel_outpoint, channel_id: next_channel_id, htlc_id, + monitor_event_id, }; Some(EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(release)) } else { @@ -12642,6 +12662,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ Some(next_user_channel_id), msg.attribution_data, send_timestamp, + None, ); Ok(()) @@ -13581,6 +13602,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ None, None, None, + Some(event_id), ); } else { log_trace!(logger, "Failing HTLC from our monitor"); @@ -13593,6 +13615,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ channel_funding_outpoint: funding_outpoint, channel_id, htlc_id: SentHTLCId::from_source(&htlc_update.source), + monitor_event_id: Some(event_id), }); self.fail_htlc_backwards_internal( &htlc_update.source, @@ -15370,8 +15393,13 @@ impl< channel_funding_outpoint, channel_id, htlc_id, + monitor_event_id, }, ) => { + if let Some(event_id) = monitor_event_id { + self.chain_monitor + .ack_monitor_event(MonitorEventSource { channel_id, event_id }); + } let per_peer_state = self.per_peer_state.read().unwrap(); let mut peer_state_lock = per_peer_state .get(&counterparty_node_id) @@ -19780,6 +19808,7 @@ impl< channel_funding_outpoint: monitor.get_funding_txo(), channel_id: monitor.channel_id(), htlc_id, + monitor_event_id: None, }; let mut compl_action = Some( EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(update) @@ -19859,6 +19888,7 @@ impl< channel_funding_outpoint: monitor.get_funding_txo(), channel_id: monitor.channel_id(), htlc_id: SentHTLCId::from_source(&htlc_source), + monitor_event_id: None, }); failed_htlcs.push(( @@ -20615,6 +20645,7 @@ impl< downstream_user_channel_id, None, None, + None, ); } From 433fdc44b0f91ee5b1b4c1106fa17977af291ad9 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 15:05:54 -0400 Subject: [PATCH 11/28] Fix replay of preimage monitor events for fwds Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. If the outbound edge of a forwarded payment is resolved on-chain via preimage, and we get a monitor event for it, we want to mark that monitor event resolved once the preimage is durably persisted in the inbound edge. Hence, we add the monitor event resolution to the completion of the inbound edge's preimage monitor update. --- lightning/src/ln/channelmanager.rs | 41 ++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 2332622cf48..5f1a6322d1b 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1475,6 +1475,9 @@ pub(crate) enum MonitorUpdateCompletionAction { EmitEventOptionAndFreeOtherChannel { event: Option, downstream_counterparty_and_funding_outpoint: EventUnblockedChannel, + /// If this action originated from a [`MonitorEvent`], the source to acknowledge once the + /// action is handled. + monitor_event_source: Option, }, /// Indicates we should immediately resume the operation of another channel, unless there is /// some other reason why the channel is blocked. In practice this simply means immediately @@ -1492,6 +1495,9 @@ pub(crate) enum MonitorUpdateCompletionAction { downstream_counterparty_node_id: PublicKey, blocking_action: RAAMonitorUpdateBlockingAction, downstream_channel_id: ChannelId, + /// If this action originated from a [`MonitorEvent`], the source to acknowledge once the + /// action is handled. + monitor_event_source: Option, }, /// Indicates that one or more [`MonitorEvent`]s should be acknowledged via /// [`chain::Watch::ack_monitor_event`] once the associated [`ChannelMonitorUpdate`] has been @@ -1510,6 +1516,7 @@ impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction, (0, downstream_counterparty_node_id, required), (4, blocking_action, upgradable_required), (5, downstream_channel_id, required), + (7, monitor_event_source, option), }, (2, EmitEventOptionAndFreeOtherChannel) => { // LDK prior to 0.3 required this field. It will not be present for trampoline payments @@ -1517,6 +1524,7 @@ impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction, // are in the process of being resolved. (0, event, upgradable_option), (1, downstream_counterparty_and_funding_outpoint, upgradable_required), + (3, monitor_event_source, option), }, (3, AckMonitorEvents) => { (0, monitor_events_to_ack, required_vec), @@ -9563,6 +9571,7 @@ impl< startup_replay: bool, next_channel_counterparty_node_id: PublicKey, next_channel_outpoint: OutPoint, next_channel_id: ChannelId, hop_data: HTLCPreviousHopData, attribution_data: Option, send_timestamp: Option, + monitor_event_source: Option, ) { let _prev_channel_id = hop_data.channel_id; let completed_blocker = RAAMonitorUpdateBlockingAction::from_prev_hop_data(&hop_data); @@ -9652,6 +9661,7 @@ impl< downstream_counterparty_node_id: chan_to_release.counterparty_node_id, downstream_channel_id: chan_to_release.channel_id, blocking_action: chan_to_release.blocking_action, + monitor_event_source, }), None, ) @@ -9667,6 +9677,7 @@ impl< Some(MonitorUpdateCompletionAction::EmitEventOptionAndFreeOtherChannel { event, downstream_counterparty_and_funding_outpoint: chan_to_release, + monitor_event_source, }), None, ) @@ -9851,8 +9862,12 @@ impl< downstream_counterparty_node_id: node_id, blocking_action: blocker, downstream_channel_id: channel_id, + monitor_event_source, } = action { + if let Some(source) = monitor_event_source { + self.chain_monitor.ack_monitor_event(source); + } if let Some(peer_state_mtx) = per_peer_state.get(&node_id) { let mut peer_state = peer_state_mtx.lock().unwrap(); if let Some(blockers) = peer_state @@ -10111,6 +10126,8 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ hop_data, attribution_data, send_timestamp, + monitor_event_id + .map(|id| MonitorEventSource { event_id: id, channel_id: next_channel_id }), ); }, HTLCSource::TrampolineForward { previous_hop_data, .. } => { @@ -10154,6 +10171,22 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ current_previous_hop_data, attribution_data.clone(), send_timestamp, + // Only pass the monitor event ID for the first hop. Once one inbound channel durably + // has the preimage, the outbound monitor event can be acked. The preimage remains in + // the outbound monitor's storage and will be replayed to all remaining inbound hops on + // restart via pending_claims_to_replay. + // + // Note that we plan to move away from this restart replay in the future, and instead + // on restart push temporary monitor events, reusing existing MonitorEvent handling + // logic to do the multi-claim. + if i == 0 { + monitor_event_id.map(|id| MonitorEventSource { + event_id: id, + channel_id: next_channel_id, + }) + } else { + None + }, ); } }, @@ -10380,7 +10413,11 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ MonitorUpdateCompletionAction::EmitEventOptionAndFreeOtherChannel { event, downstream_counterparty_and_funding_outpoint, + monitor_event_source, } => { + if let Some(source) = monitor_event_source { + self.chain_monitor.ack_monitor_event(source); + } if let Some(event) = event { self.pending_events.lock().unwrap().push_back((event, None)); } @@ -10394,7 +10431,11 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ downstream_counterparty_node_id, downstream_channel_id, blocking_action, + monitor_event_source, } => { + if let Some(source) = monitor_event_source { + self.chain_monitor.ack_monitor_event(source); + } self.handle_monitor_update_release( downstream_counterparty_node_id, downstream_channel_id, From 370822fc723ce2aec6ac6bdefeaf2e61c01b21ca Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 16:03:46 -0400 Subject: [PATCH 12/28] Track monitor event id in HTLCForwardInfo::Fail* Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. To ensure we can resolve HTLC monitor events for forward failures, we need to pipe the event id through the HTLC failure pipeline, starting from when we first handle the monitor event and call fail_htlc_backwards. To get it from there to the next stage, here we store the monitor event id in the manager's HTLCForwardInfo::Fail*. In upcoming commits we will eventually get to the point of acking the monitor event when the HTLC is irrevocably removed from the inbound edge via monitor update. --- lightning/src/ln/channelmanager.rs | 61 +++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 5f1a6322d1b..5512bc08a98 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -497,8 +497,21 @@ impl PendingAddHTLCInfo { #[cfg_attr(test, derive(Clone, Debug, PartialEq))] pub(super) enum HTLCForwardInfo { AddHTLC(PendingAddHTLCInfo), - FailHTLC { htlc_id: u64, err_packet: msgs::OnionErrorPacket }, - FailMalformedHTLC { htlc_id: u64, failure_code: u16, sha256_of_onion: [u8; 32] }, + FailHTLC { + htlc_id: u64, + err_packet: msgs::OnionErrorPacket, + /// A pointer to a [`MonitorEvent`] that can be removed from the outbound edge of this failed + /// forward, once the failure is durably persisted on the inbound edge. + monitor_event_source: Option, + }, + FailMalformedHTLC { + htlc_id: u64, + failure_code: u16, + sha256_of_onion: [u8; 32], + /// A pointer to a [`MonitorEvent`] that can be removed from the outbound edge of this failed + /// forward, once the failure is durably persisted on the inbound edge. + monitor_event_source: Option, + }, } /// Whether this blinded HTLC is being failed backwards by the introduction node or a blinded node, @@ -7654,12 +7667,14 @@ impl< HTLCFailureMsg::Relay(fail_htlc) => HTLCForwardInfo::FailHTLC { htlc_id: fail_htlc.htlc_id, err_packet: fail_htlc.into(), + monitor_event_source: None, }, HTLCFailureMsg::Malformed(fail_malformed_htlc) => { HTLCForwardInfo::FailMalformedHTLC { htlc_id: fail_malformed_htlc.htlc_id, sha256_of_onion: fail_malformed_htlc.sha256_of_onion, failure_code: fail_malformed_htlc.failure_code.into(), + monitor_event_source: None, } }, }; @@ -8189,7 +8204,7 @@ impl< } None }, - HTLCForwardInfo::FailHTLC { htlc_id, ref err_packet } => { + HTLCForwardInfo::FailHTLC { htlc_id, ref err_packet, .. } => { if let Some(chan) = peer_state .channel_by_id .get_mut(&forward_chan_id) @@ -8209,7 +8224,12 @@ impl< break; } }, - HTLCForwardInfo::FailMalformedHTLC { htlc_id, failure_code, sha256_of_onion } => { + HTLCForwardInfo::FailMalformedHTLC { + htlc_id, + failure_code, + sha256_of_onion, + .. + } => { if let Some(chan) = peer_state .channel_by_id .get_mut(&forward_chan_id) @@ -9247,6 +9267,10 @@ impl< onion_error ); + let monitor_event_source = from_monitor_update_completion.as_ref().and_then(|u| { + u.monitor_event_id + .map(|id| MonitorEventSource { event_id: id, channel_id: u.channel_id }) + }); push_forward_htlcs_failure( *prev_outbound_scid_alias, get_htlc_forward_failure( @@ -9256,6 +9280,7 @@ impl< trampoline_shared_secret, phantom_shared_secret, *htlc_id, + monitor_event_source, ), ); @@ -9291,7 +9316,12 @@ impl< // necessarily want to fail all of our incoming HTLCs back yet. We may have other // outgoing HTLCs that need to resolve first. This will be tracked in our // pending_outbound_payments in a followup. - for current_hop_data in previous_hop_data { + let trampoline_monitor_event_source = + from_monitor_update_completion.as_ref().and_then(|u| { + u.monitor_event_id + .map(|id| MonitorEventSource { event_id: id, channel_id: u.channel_id }) + }); + for (i, current_hop_data) in previous_hop_data.iter().enumerate() { let HTLCPreviousHopData { prev_outbound_scid_alias, htlc_id, @@ -9318,6 +9348,7 @@ impl< &incoming_trampoline_shared_secret, &None, *htlc_id, + if i == 0 { trampoline_monitor_event_source } else { None }, ), ); } @@ -14318,6 +14349,7 @@ fn get_htlc_forward_failure( blinded_failure: &Option, onion_error: &HTLCFailReason, incoming_packet_shared_secret: &[u8; 32], trampoline_shared_secret: &Option<[u8; 32]>, phantom_shared_secret: &Option<[u8; 32]>, htlc_id: u64, + monitor_event_source: Option, ) -> HTLCForwardInfo { // TODO: Correctly wrap the error packet twice if failing back a trampoline + phantom HTLC. let secondary_shared_secret = trampoline_shared_secret.or(*phantom_shared_secret); @@ -14329,19 +14361,20 @@ fn get_htlc_forward_failure( incoming_packet_shared_secret, &secondary_shared_secret, ); - HTLCForwardInfo::FailHTLC { htlc_id, err_packet } + HTLCForwardInfo::FailHTLC { htlc_id, err_packet, monitor_event_source } }, Some(BlindedFailure::FromBlindedNode) => HTLCForwardInfo::FailMalformedHTLC { htlc_id, failure_code: LocalHTLCFailureReason::InvalidOnionBlinding.failure_code(), sha256_of_onion: [0; 32], + monitor_event_source, }, None => { let err_packet = onion_error.get_encrypted_failure_packet( incoming_packet_shared_secret, &secondary_shared_secret, ); - HTLCForwardInfo::FailHTLC { htlc_id, err_packet } + HTLCForwardInfo::FailHTLC { htlc_id, err_packet, monitor_event_source } }, } } @@ -18012,15 +18045,21 @@ impl Writeable for HTLCForwardInfo { 0u8.write(w)?; info.write(w)?; }, - Self::FailHTLC { htlc_id, err_packet } => { + Self::FailHTLC { htlc_id, err_packet, monitor_event_source } => { FAIL_HTLC_VARIANT_ID.write(w)?; write_tlv_fields!(w, { (0, htlc_id, required), (2, err_packet.data, required), (5, err_packet.attribution_data, option), + (7, monitor_event_source, option), }); }, - Self::FailMalformedHTLC { htlc_id, failure_code, sha256_of_onion } => { + Self::FailMalformedHTLC { + htlc_id, + failure_code, + sha256_of_onion, + monitor_event_source, + } => { // Since this variant was added in 0.0.119, write this as `::FailHTLC` with an empty error // packet so older versions have something to fail back with, but serialize the real data as // optional TLVs for the benefit of newer versions. @@ -18030,6 +18069,7 @@ impl Writeable for HTLCForwardInfo { (1, failure_code, required), (2, Vec::::new(), required), (3, sha256_of_onion, required), + (7, monitor_event_source, option), }); }, } @@ -18050,6 +18090,7 @@ impl Readable for HTLCForwardInfo { (2, err_packet, required), (3, sha256_of_onion, option), (5, attribution_data, option), + (7, monitor_event_source, option), }); if let Some(failure_code) = malformed_htlc_failure_code { if attribution_data.is_some() { @@ -18059,6 +18100,7 @@ impl Readable for HTLCForwardInfo { htlc_id: _init_tlv_based_struct_field!(htlc_id, required), failure_code, sha256_of_onion: sha256_of_onion.ok_or(DecodeError::InvalidValue)?, + monitor_event_source, } } else { Self::FailHTLC { @@ -18067,6 +18109,7 @@ impl Readable for HTLCForwardInfo { data: _init_tlv_based_struct_field!(err_packet, required), attribution_data: _init_tlv_based_struct_field!(attribution_data, option), }, + monitor_event_source, } } }, From ea5836bd45c4299d8ddeb9d7af234a3002db5762 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 16:29:47 -0400 Subject: [PATCH 13/28] Add monitor_event_source to holding cell HTLC fails Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. To ensure we can resolve HTLC monitor events for forward failures, we need to pipe the event id through the HTLC failure pipeline. We started this process in the previous commit, and here we continue by storing the monitor event id in the Channel's holding cell HTLC failures. In upcoming commits we will eventually get to the point of acking the monitor event when the HTLC is irrevocably removed from the inbound edge via monitor update. --- lightning/src/ln/channel.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 03f78dc82b4..445f9f8dd77 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -30,6 +30,7 @@ use crate::blinded_path::message::BlindedMessagePath; use crate::chain::chaininterface::{ ConfirmationTarget, FeeEstimator, LowerBoundedFeeEstimator, TransactionType, }; +use crate::chain::chainmonitor::MonitorEventSource; use crate::chain::channelmonitor::{ ChannelMonitor, ChannelMonitorUpdate, ChannelMonitorUpdateStep, CommitmentHTLCData, LATENCY_GRACE_PERIOD_BLOCKS, @@ -551,11 +552,21 @@ enum HTLCUpdateAwaitingACK { FailHTLC { htlc_id: u64, err_packet: msgs::OnionErrorPacket, + /// Contains a pointer to a [`MonitorEvent`] that can be removed from the outbound edge of this + /// failed forward, once the failure is durably persisted in this channel (the inbound edge). + /// + /// [`MonitorEvent`]: crate::chain::channelmonitor::MonitorEvent + monitor_event_source: Option, }, FailMalformedHTLC { htlc_id: u64, failure_code: u16, sha256_of_onion: [u8; 32], + /// Contains a pointer to a [`MonitorEvent`] that can be removed from the outbound edge of this + /// failed forward, once the failure is durably persisted in this channel (the inbound edge). + /// + /// [`MonitorEvent`]: crate::chain::channelmonitor::MonitorEvent + monitor_event_source: Option, }, } @@ -6887,7 +6898,7 @@ impl FailHTLCContents for msgs::OnionErrorPacket { InboundHTLCState::LocalRemoved(InboundHTLCRemovalReason::FailRelay(self)) } fn to_htlc_update_awaiting_ack(self, htlc_id: u64) -> HTLCUpdateAwaitingACK { - HTLCUpdateAwaitingACK::FailHTLC { htlc_id, err_packet: self } + HTLCUpdateAwaitingACK::FailHTLC { htlc_id, err_packet: self, monitor_event_source: None } } } impl FailHTLCContents for ([u8; 32], u16) { @@ -6911,6 +6922,7 @@ impl FailHTLCContents for ([u8; 32], u16) { htlc_id, sha256_of_onion: self.0, failure_code: self.1, + monitor_event_source: None, } } } @@ -8780,7 +8792,7 @@ where monitor_update.updates.append(&mut additional_monitor_update.updates); None }, - &HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet } => Some( + &HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet, .. } => Some( self.fail_htlc(htlc_id, err_packet.clone(), false, logger) .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), ), @@ -8788,6 +8800,7 @@ where htlc_id, failure_code, sha256_of_onion, + .. } => Some( self.fail_htlc(htlc_id, (sha256_of_onion, failure_code), false, logger) .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), @@ -15549,7 +15562,7 @@ impl Writeable for FundedChannel { // Store the attribution data for later writing. holding_cell_attribution_data.push(attribution_data.as_ref()); }, - &HTLCUpdateAwaitingACK::FailHTLC { ref htlc_id, ref err_packet } => { + &HTLCUpdateAwaitingACK::FailHTLC { ref htlc_id, ref err_packet, .. } => { 2u8.write(writer)?; htlc_id.write(writer)?; err_packet.data.write(writer)?; @@ -15561,6 +15574,7 @@ impl Writeable for FundedChannel { htlc_id, failure_code, sha256_of_onion, + .. } => { // We don't want to break downgrading by adding a new variant, so write a dummy // `::FailHTLC` variant and write the real malformed error as an optional TLV. @@ -16006,6 +16020,7 @@ impl<'a, 'b, 'c, ES: EntropySource, SP: SignerProvider> data: Readable::read(reader)?, attribution_data: None, }, + monitor_event_source: None, }, _ => return Err(DecodeError::InvalidValue), }); @@ -16462,7 +16477,7 @@ impl<'a, 'b, 'c, ES: EntropySource, SP: SignerProvider> let htlc_idx = holding_cell_htlc_updates .iter() .position(|htlc| { - if let HTLCUpdateAwaitingACK::FailHTLC { htlc_id, err_packet } = htlc { + if let HTLCUpdateAwaitingACK::FailHTLC { htlc_id, err_packet, .. } = htlc { let matches = *htlc_id == malformed_htlc_id; if matches { debug_assert!(err_packet.data.is_empty()) @@ -16477,6 +16492,7 @@ impl<'a, 'b, 'c, ES: EntropySource, SP: SignerProvider> htlc_id: malformed_htlc_id, failure_code, sha256_of_onion, + monitor_event_source: None, }; let _ = core::mem::replace(&mut holding_cell_htlc_updates[htlc_idx], malformed_htlc); @@ -17504,12 +17520,14 @@ mod tests { |htlc_id, attribution_data| HTLCUpdateAwaitingACK::FailHTLC { htlc_id, err_packet: msgs::OnionErrorPacket { data: vec![42], attribution_data }, + monitor_event_source: None, }; let dummy_holding_cell_malformed_htlc = |htlc_id| HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, failure_code: LocalHTLCFailureReason::InvalidOnionBlinding.failure_code(), sha256_of_onion: [0; 32], + monitor_event_source: None, }; let mut holding_cell_htlc_updates = Vec::with_capacity(12); for i in 0..16 { From 786a3351834ff760065c6da789e97e583685cd02 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 16:37:55 -0400 Subject: [PATCH 14/28] Pipe monitor event source HTLCForwardInfo -> holding cell htlcs Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. To ensure we can resolve HTLC monitor events for forward failures, we need to pipe the event id through the HTLC failure pipeline. Here we pipe the monitor event source from the manager to the channel, so it can be put in the channel's holding cell. In upcoming commits we will eventually get to the point of acking the monitor event when the HTLC is irrevocably removed from the inbound edge via monitor update. --- lightning/src/ln/channel.rs | 79 +++++++++++++++++++++--------- lightning/src/ln/channelmanager.rs | 15 ++++-- 2 files changed, 69 insertions(+), 25 deletions(-) diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 445f9f8dd77..92fd8c0ca43 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -6882,7 +6882,9 @@ trait FailHTLCContents { type Message: FailHTLCMessageName; fn to_message(self, htlc_id: u64, channel_id: ChannelId) -> Self::Message; fn to_inbound_htlc_state(self) -> InboundHTLCState; - fn to_htlc_update_awaiting_ack(self, htlc_id: u64) -> HTLCUpdateAwaitingACK; + fn to_htlc_update_awaiting_ack( + self, htlc_id: u64, monitor_event_source: Option, + ) -> HTLCUpdateAwaitingACK; } impl FailHTLCContents for msgs::OnionErrorPacket { type Message = msgs::UpdateFailHTLC; @@ -6897,8 +6899,10 @@ impl FailHTLCContents for msgs::OnionErrorPacket { fn to_inbound_htlc_state(self) -> InboundHTLCState { InboundHTLCState::LocalRemoved(InboundHTLCRemovalReason::FailRelay(self)) } - fn to_htlc_update_awaiting_ack(self, htlc_id: u64) -> HTLCUpdateAwaitingACK { - HTLCUpdateAwaitingACK::FailHTLC { htlc_id, err_packet: self, monitor_event_source: None } + fn to_htlc_update_awaiting_ack( + self, htlc_id: u64, monitor_event_source: Option, + ) -> HTLCUpdateAwaitingACK { + HTLCUpdateAwaitingACK::FailHTLC { htlc_id, err_packet: self, monitor_event_source } } } impl FailHTLCContents for ([u8; 32], u16) { @@ -6917,12 +6921,14 @@ impl FailHTLCContents for ([u8; 32], u16) { failure_code: self.1, }) } - fn to_htlc_update_awaiting_ack(self, htlc_id: u64) -> HTLCUpdateAwaitingACK { + fn to_htlc_update_awaiting_ack( + self, htlc_id: u64, monitor_event_source: Option, + ) -> HTLCUpdateAwaitingACK { HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, sha256_of_onion: self.0, failure_code: self.1, - monitor_event_source: None, + monitor_event_source, } } } @@ -7686,9 +7692,10 @@ where /// Returns `Err` (always with [`ChannelError::Ignore`]) if the HTLC could not be failed (e.g. /// if it was already resolved). Otherwise returns `Ok`. pub fn queue_fail_htlc( - &mut self, htlc_id_arg: u64, err_packet: msgs::OnionErrorPacket, logger: &L, + &mut self, htlc_id_arg: u64, err_packet: msgs::OnionErrorPacket, + monitor_event_source: Option, logger: &L, ) -> Result<(), ChannelError> { - self.fail_htlc(htlc_id_arg, err_packet, true, logger) + self.fail_htlc(htlc_id_arg, err_packet, true, monitor_event_source, logger) .map(|msg_opt| assert!(msg_opt.is_none(), "We forced holding cell?")) } @@ -7697,10 +7704,17 @@ where /// /// See [`Self::queue_fail_htlc`] for more info. pub fn queue_fail_malformed_htlc( - &mut self, htlc_id_arg: u64, failure_code: u16, sha256_of_onion: [u8; 32], logger: &L, + &mut self, htlc_id_arg: u64, failure_code: u16, sha256_of_onion: [u8; 32], + monitor_event_source: Option, logger: &L, ) -> Result<(), ChannelError> { - self.fail_htlc(htlc_id_arg, (sha256_of_onion, failure_code), true, logger) - .map(|msg_opt| assert!(msg_opt.is_none(), "We forced holding cell?")) + self.fail_htlc( + htlc_id_arg, + (sha256_of_onion, failure_code), + true, + monitor_event_source, + logger, + ) + .map(|msg_opt| assert!(msg_opt.is_none(), "We forced holding cell?")) } /// Returns `Err` (always with [`ChannelError::Ignore`]) if the HTLC could not be failed (e.g. @@ -7708,7 +7722,7 @@ where #[rustfmt::skip] fn fail_htlc( &mut self, htlc_id_arg: u64, err_contents: E, mut force_holding_cell: bool, - logger: &L + monitor_event_source: Option, logger: &L ) -> Result, ChannelError> { if !matches!(self.context.channel_state, ChannelState::ChannelReady(_)) { panic!("Was asked to fail an HTLC when channel was not in an operational state"); @@ -7745,17 +7759,22 @@ where // Now update local state: if force_holding_cell { - for pending_update in self.context.holding_cell_htlc_updates.iter() { + for pending_update in self.context.holding_cell_htlc_updates.iter_mut() { match pending_update { - &HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => { + &mut HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => { if htlc_id_arg == htlc_id { return Err(ChannelError::Ignore(format!("HTLC {} was already claimed!", htlc_id))); } }, - &HTLCUpdateAwaitingACK::FailHTLC { htlc_id, .. } | - &HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, .. } => + &mut HTLCUpdateAwaitingACK::FailHTLC { htlc_id, monitor_event_source: ref mut src, .. } | + &mut HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, monitor_event_source: ref mut src, .. } => { if htlc_id_arg == htlc_id { + if src.is_none() { + // If the monitor event source is unset, may as well set it here. Currently + // unreachable. + *src = monitor_event_source; + } return Err(ChannelError::Ignore(format!("HTLC {} was already pending failure", htlc_id))); } }, @@ -7763,7 +7782,7 @@ where } } log_trace!(logger, "Placing failure for HTLC ID {} in holding cell.", htlc_id_arg); - self.context.holding_cell_htlc_updates.push(err_contents.to_htlc_update_awaiting_ack(htlc_id_arg)); + self.context.holding_cell_htlc_updates.push(err_contents.to_htlc_update_awaiting_ack(htlc_id_arg, monitor_event_source)); return Ok(None); } @@ -8792,18 +8811,34 @@ where monitor_update.updates.append(&mut additional_monitor_update.updates); None }, - &HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet, .. } => Some( - self.fail_htlc(htlc_id, err_packet.clone(), false, logger) - .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), + &HTLCUpdateAwaitingACK::FailHTLC { + htlc_id, + ref err_packet, + monitor_event_source, + } => Some( + self.fail_htlc( + htlc_id, + err_packet.clone(), + false, + monitor_event_source, + logger, + ) + .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), ), &HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, failure_code, sha256_of_onion, - .. + monitor_event_source, } => Some( - self.fail_htlc(htlc_id, (sha256_of_onion, failure_code), false, logger) - .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), + self.fail_htlc( + htlc_id, + (sha256_of_onion, failure_code), + false, + monitor_event_source, + logger, + ) + .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), ), }; if let Some(res) = fail_htlc_res { diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 5512bc08a98..9455428bf2e 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -8204,7 +8204,7 @@ impl< } None }, - HTLCForwardInfo::FailHTLC { htlc_id, ref err_packet, .. } => { + HTLCForwardInfo::FailHTLC { htlc_id, ref err_packet, monitor_event_source } => { if let Some(chan) = peer_state .channel_by_id .get_mut(&forward_chan_id) @@ -8212,7 +8212,15 @@ impl< { let logger = WithChannelContext::from(&self.logger, &chan.context, None); log_trace!(logger, "Failing HTLC back to channel with short id {} (backward HTLC ID {}) after delay", short_chan_id, htlc_id); - Some((chan.queue_fail_htlc(htlc_id, err_packet.clone(), &&logger), htlc_id)) + Some(( + chan.queue_fail_htlc( + htlc_id, + err_packet.clone(), + monitor_event_source, + &&logger, + ), + htlc_id, + )) } else { self.forwarding_channel_not_found( core::iter::once(forward_info).chain(draining_pending_forwards), @@ -8228,7 +8236,7 @@ impl< htlc_id, failure_code, sha256_of_onion, - .. + monitor_event_source, } => { if let Some(chan) = peer_state .channel_by_id @@ -8241,6 +8249,7 @@ impl< htlc_id, failure_code, sha256_of_onion, + monitor_event_source, &&logger, ); Some((res, htlc_id)) From 0326235750ad1c4ea84a9a212244b0fe0e323907 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 16 Mar 2026 17:40:44 -0400 Subject: [PATCH 15/28] Ack monitor events on check_free_peer_holding_cells Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we build on recent commits by ACK'ing monitor events for forward failures once the monitor update that marks them as failed on the inbound edge is complete. --- lightning/src/ln/channel.rs | 62 +++++++++++++++++++----------- lightning/src/ln/channelmanager.rs | 11 +++++- 2 files changed, 50 insertions(+), 23 deletions(-) diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 92fd8c0ca43..b8a7e2f3c71 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -8676,7 +8676,7 @@ where /// returns `(None, Vec::new())`. pub fn maybe_free_holding_cell_htlcs( &mut self, fee_estimator: &LowerBoundedFeeEstimator, logger: &L, - ) -> (Option, Vec<(HTLCSource, PaymentHash)>) { + ) -> (Option<(ChannelMonitorUpdate, Vec)>, Vec<(HTLCSource, PaymentHash)>) { if matches!(self.context.channel_state, ChannelState::ChannelReady(_)) && self.context.channel_state.can_generate_new_commitment() { @@ -8690,7 +8690,7 @@ where /// for our counterparty. fn free_holding_cell_htlcs( &mut self, fee_estimator: &LowerBoundedFeeEstimator, logger: &L, - ) -> (Option, Vec<(HTLCSource, PaymentHash)>) { + ) -> (Option<(ChannelMonitorUpdate, Vec)>, Vec<(HTLCSource, PaymentHash)>) { assert!(matches!(self.context.channel_state, ChannelState::ChannelReady(_))); assert!(!self.context.channel_state.is_monitor_update_in_progress()); assert!(!self.context.channel_state.is_quiescent()); @@ -8720,6 +8720,7 @@ where let mut update_fulfill_count = 0; let mut update_fail_count = 0; let mut htlcs_to_fail = Vec::new(); + let mut monitor_events_to_ack = Vec::new(); for htlc_update in htlc_updates.drain(..) { // Note that this *can* fail, though it should be due to rather-rare conditions on // fee races with adding too many outputs which push our total payments just over @@ -8815,31 +8816,41 @@ where htlc_id, ref err_packet, monitor_event_source, - } => Some( - self.fail_htlc( - htlc_id, - err_packet.clone(), - false, - monitor_event_source, - logger, + } => { + if let Some(source) = monitor_event_source { + monitor_events_to_ack.push(source); + } + Some( + self.fail_htlc( + htlc_id, + err_packet.clone(), + false, + monitor_event_source, + logger, + ) + .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), ) - .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), - ), + }, &HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, failure_code, sha256_of_onion, monitor_event_source, - } => Some( - self.fail_htlc( - htlc_id, - (sha256_of_onion, failure_code), - false, - monitor_event_source, - logger, + } => { + if let Some(source) = monitor_event_source { + monitor_events_to_ack.push(source); + } + Some( + self.fail_htlc( + htlc_id, + (sha256_of_onion, failure_code), + false, + monitor_event_source, + logger, + ) + .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), ) - .map(|fail_msg_opt| fail_msg_opt.map(|_| ())), - ), + }, }; if let Some(res) = fail_htlc_res { match res { @@ -8891,7 +8902,11 @@ where Vec::new(), logger, ); - (self.push_ret_blockable_mon_update(monitor_update), htlcs_to_fail) + ( + self.push_ret_blockable_mon_update(monitor_update) + .map(|upd| (upd, monitor_events_to_ack)), + htlcs_to_fail, + ) } else { (None, Vec::new()) } @@ -9244,7 +9259,10 @@ where self.context.monitor_pending_update_adds.append(&mut pending_update_adds); match self.maybe_free_holding_cell_htlcs(fee_estimator, logger) { - (Some(mut additional_update), htlcs_to_fail) => { + // TODO: Thread monitor_events_to_ack through the revoke_and_ack return + // value so the ChannelManager can attach an AckMonitorEvents completion + // action to this monitor update. + (Some((mut additional_update, _monitor_events_to_ack)), htlcs_to_fail) => { // free_holding_cell_htlcs may bump latest_monitor_id multiple times but we want them to be // strictly increasing by one, so decrement it here. self.context.latest_monitor_update_id = monitor_update.update_id; diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 9455428bf2e..bc99c3c022b 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -13827,7 +13827,16 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ ); if monitor_opt.is_some() || !holding_cell_failed_htlcs.is_empty() { let update_res = monitor_opt - .map(|monitor_update| { + .map(|(monitor_update, monitor_events_to_ack)| { + if !monitor_events_to_ack.is_empty() { + peer_state + .monitor_update_blocked_actions + .entry(*chan_id) + .or_default() + .push(MonitorUpdateCompletionAction::AckMonitorEvents { + monitor_events_to_ack, + }); + } self.handle_new_monitor_update( &mut peer_state.in_flight_monitor_updates, &mut peer_state.monitor_update_blocked_actions, From c759e7746868e8c7b7fbddfbb0668930997805de Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 17 Mar 2026 10:17:48 -0400 Subject: [PATCH 16/28] Ack monitor events on revoke_and_ack Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we build on recent commits by ACK'ing monitor events for forward failures once the monitor update that marks them as failed on the inbound edge is complete, when the HTLC was irrevocably failed via revoke_and_ack. --- lightning/src/ln/channel.rs | 16 ++++++++++------ lightning/src/ln/channelmanager.rs | 11 ++++++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index b8a7e2f3c71..78a2307ef05 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -8932,7 +8932,7 @@ where ( Vec<(HTLCSource, PaymentHash)>, Vec<(StaticInvoice, BlindedMessagePath)>, - Option, + Option<(ChannelMonitorUpdate, Vec)>, ), ChannelError, > { @@ -9243,6 +9243,7 @@ where } else { "Blocked" }; + let mut monitor_events_to_ack = Vec::new(); macro_rules! return_with_htlcs_to_fail { ($htlcs_to_fail: expr) => { if !release_monitor { @@ -9251,7 +9252,12 @@ where .push(PendingChannelMonitorUpdate { update: monitor_update }); return Ok(($htlcs_to_fail, static_invoices, None)); } else { - return Ok(($htlcs_to_fail, static_invoices, Some(monitor_update))); + let events_to_ack = core::mem::take(&mut monitor_events_to_ack); + return Ok(( + $htlcs_to_fail, + static_invoices, + Some((monitor_update, events_to_ack)), + )); } }; } @@ -9259,10 +9265,8 @@ where self.context.monitor_pending_update_adds.append(&mut pending_update_adds); match self.maybe_free_holding_cell_htlcs(fee_estimator, logger) { - // TODO: Thread monitor_events_to_ack through the revoke_and_ack return - // value so the ChannelManager can attach an AckMonitorEvents completion - // action to this monitor update. - (Some((mut additional_update, _monitor_events_to_ack)), htlcs_to_fail) => { + (Some((mut additional_update, holding_cell_monitor_events_to_ack)), htlcs_to_fail) => { + monitor_events_to_ack = holding_cell_monitor_events_to_ack; // free_holding_cell_htlcs may bump latest_monitor_id multiple times but we want them to be // strictly increasing by one, so decrement it here. self.context.latest_monitor_update_id = monitor_update.update_id; diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index bc99c3c022b..70390cf5ab6 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -13012,7 +13012,16 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ *counterparty_node_id); let (htlcs_to_fail, static_invoices, monitor_update_opt) = try_channel_entry!(self, peer_state, chan.revoke_and_ack(&msg, &self.fee_estimator, &&logger, mon_update_blocked), chan_entry); - if let Some(monitor_update) = monitor_update_opt { + if let Some((monitor_update, monitor_events_to_ack)) = monitor_update_opt { + if !monitor_events_to_ack.is_empty() { + peer_state + .monitor_update_blocked_actions + .entry(msg.channel_id) + .or_default() + .push(MonitorUpdateCompletionAction::AckMonitorEvents { + monitor_events_to_ack, + }); + } let funding_txo = funding_txo_opt .expect("Funding outpoint must have been set for RAA handling to succeed"); if let Some(data) = self.handle_new_monitor_update( From 8c95410f489b8c6700f8b5927e314b024746839c Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 17 Mar 2026 10:21:44 -0400 Subject: [PATCH 17/28] Ack monitor event when inbound edge is closed Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we build on recent commits by ACK'ing monitor events for forward failures if try to fail backwards and discover the inbound edge is closed. If that's the case, there's nothing for us to do as the HTLC will be resolved via timeout by the inbound edge counterparty. --- lightning/src/ln/channelmanager.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 70390cf5ab6..3294ed0e673 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -7999,11 +7999,15 @@ impl< continue; } }, - HTLCForwardInfo::FailHTLC { .. } | HTLCForwardInfo::FailMalformedHTLC { .. } => { + HTLCForwardInfo::FailHTLC { monitor_event_source, .. } + | HTLCForwardInfo::FailMalformedHTLC { monitor_event_source, .. } => { // Channel went away before we could fail it. This implies // the channel is now on chain and our counterparty is // trying to broadcast the HTLC-Timeout, but that's their // problem, not ours. + if let Some(source) = monitor_event_source { + self.chain_monitor.ack_monitor_event(source); + } }, } } From 2a93da104b90a6a8bb9e2c44733dd229d39b1e2b Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 17 Mar 2026 10:30:12 -0400 Subject: [PATCH 18/28] Ack monitor events when failing-to-fail-backwards Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we build on recent commits by ACK'ing monitor events for forward failures if we go to fail on the inbound edge and get an error when queueing the backwards failure. If we get an error in this case it means the failure is duplicate or the channel is closed, and in either case it's fine to mark the event as resolved. --- lightning/src/ln/channelmanager.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 3294ed0e673..80c63d77c3b 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -8224,6 +8224,7 @@ impl< &&logger, ), htlc_id, + monitor_event_source, )) } else { self.forwarding_channel_not_found( @@ -8256,7 +8257,7 @@ impl< monitor_event_source, &&logger, ); - Some((res, htlc_id)) + Some((res, htlc_id, monitor_event_source)) } else { self.forwarding_channel_not_found( core::iter::once(forward_info).chain(draining_pending_forwards), @@ -8269,8 +8270,12 @@ impl< } }, }; - if let Some((queue_fail_htlc_res, htlc_id)) = queue_fail_htlc_res { + if let Some((queue_fail_htlc_res, htlc_id, monitor_event_source)) = queue_fail_htlc_res + { if let Err(e) = queue_fail_htlc_res { + if let Some(source) = monitor_event_source { + self.chain_monitor.ack_monitor_event(source); + } if let ChannelError::Ignore(msg) = e { if let Some(chan) = peer_state .channel_by_id From 27fa5a1edbf40a40f767f3ecff484a107f8e917e Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 23 Mar 2026 15:39:33 -0400 Subject: [PATCH 19/28] Ack monitor events from blocked updates in Channel Previously, we would drop the list of monitor events that could be ack'd after a ChannelMonitorUpdate completed, if that monitor update was generated within the Channel and ended up in the Channel's blocked-updates queue. --- lightning/src/ln/channel.rs | 53 ++++++++++++++++++++++-------- lightning/src/ln/channelmanager.rs | 23 ++++++++++--- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 78a2307ef05..4839424d38c 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -1485,10 +1485,13 @@ pub(crate) const CHANNEL_ANNOUNCEMENT_PROPAGATION_DELAY: u32 = 144; #[derive(Debug)] struct PendingChannelMonitorUpdate { update: ChannelMonitorUpdate, + /// `MonitorEvent`s that can be ack'd after `update` is durably persisted. + post_update_ackable_events: Vec, } impl_writeable_tlv_based!(PendingChannelMonitorUpdate, { (0, update, required), + (1, post_update_ackable_events, optional_vec), }); /// A payment channel with a counterparty throughout its life-cycle, encapsulating negotiation and @@ -7668,9 +7671,10 @@ where if !update_blocked { debug_assert!(false, "If there is a pending blocked monitor we should have MonitorUpdateInProgress set"); let update = self.build_commitment_no_status_check(logger); - self.context - .blocked_monitor_updates - .push(PendingChannelMonitorUpdate { update }); + self.context.blocked_monitor_updates.push(PendingChannelMonitorUpdate { + update, + post_update_ackable_events: Vec::new(), + }); } } @@ -8903,8 +8907,10 @@ where logger, ); ( - self.push_ret_blockable_mon_update(monitor_update) - .map(|upd| (upd, monitor_events_to_ack)), + self.push_ret_blockable_mon_update_with_event_sources( + monitor_update, + monitor_events_to_ack, + ), htlcs_to_fail, ) } else { @@ -9246,13 +9252,14 @@ where let mut monitor_events_to_ack = Vec::new(); macro_rules! return_with_htlcs_to_fail { ($htlcs_to_fail: expr) => { + let events_to_ack = core::mem::take(&mut monitor_events_to_ack); if !release_monitor { - self.context - .blocked_monitor_updates - .push(PendingChannelMonitorUpdate { update: monitor_update }); + self.context.blocked_monitor_updates.push(PendingChannelMonitorUpdate { + update: monitor_update, + post_update_ackable_events: events_to_ack, + }); return Ok(($htlcs_to_fail, static_invoices, None)); } else { - let events_to_ack = core::mem::take(&mut monitor_events_to_ack); return Ok(( $htlcs_to_fail, static_invoices, @@ -11346,12 +11353,16 @@ where /// Returns the next blocked monitor update, if one exists, and a bool which indicates a /// further blocked monitor update exists after the next. - pub fn unblock_next_blocked_monitor_update(&mut self) -> Option<(ChannelMonitorUpdate, bool)> { + pub fn unblock_next_blocked_monitor_update( + &mut self, + ) -> Option<(ChannelMonitorUpdate, Vec, bool)> { if self.context.blocked_monitor_updates.is_empty() { return None; } + let pending = self.context.blocked_monitor_updates.remove(0); Some(( - self.context.blocked_monitor_updates.remove(0).update, + pending.update, + pending.post_update_ackable_events, !self.context.blocked_monitor_updates.is_empty(), )) } @@ -11361,14 +11372,24 @@ where #[rustfmt::skip] fn push_ret_blockable_mon_update(&mut self, update: ChannelMonitorUpdate) -> Option { + self.push_ret_blockable_mon_update_with_event_sources(update, Vec::new()) + .map(|(upd, _)| upd) + } + + /// Similar to `push_ret_blockable_mon_update`, but allows including a list of + /// `MonitorEventSource`s that can be ack'd after the `update` is durably persisted. + fn push_ret_blockable_mon_update_with_event_sources( + &mut self, update: ChannelMonitorUpdate, monitor_event_sources: Vec, + ) -> Option<(ChannelMonitorUpdate, Vec)> { let release_monitor = self.context.blocked_monitor_updates.is_empty(); if !release_monitor { self.context.blocked_monitor_updates.push(PendingChannelMonitorUpdate { update, + post_update_ackable_events: monitor_event_sources, }); None } else { - Some(update) + Some((update, monitor_event_sources)) } } @@ -11377,19 +11398,23 @@ where /// here after logging them. pub fn on_startup_drop_completed_blocked_mon_updates_through( &mut self, logger: &L, loaded_mon_update_id: u64, - ) { - self.context.blocked_monitor_updates.retain(|update| { + ) -> Vec { + let mut monitor_events_to_ack = Vec::new(); + self.context.blocked_monitor_updates.retain_mut(|update| { if update.update.update_id <= loaded_mon_update_id { log_info!( logger, "Dropping completed ChannelMonitorUpdate id {} due to a stale ChannelManager", update.update.update_id, ); + monitor_events_to_ack + .extend(core::mem::take(&mut update.post_update_ackable_events)); false } else { true } }); + monitor_events_to_ack } pub fn blocked_monitor_updates_pending(&self) -> usize { diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 80c63d77c3b..fb13cc27716 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -15437,9 +15437,18 @@ impl< channel_id) { if let Some(chan) = chan_entry.get_mut().as_funded_mut() { let channel_funding_outpoint = chan.funding_outpoint(); - if let Some((monitor_update, further_update_exists)) = chan.unblock_next_blocked_monitor_update() { + if let Some((monitor_update, monitor_events_to_ack, further_update_exists)) = chan.unblock_next_blocked_monitor_update() { log_debug!(logger, "Unlocking monitor updating and updating monitor", ); + if !monitor_events_to_ack.is_empty() { + peer_state + .monitor_update_blocked_actions + .entry(channel_id) + .or_default() + .push(MonitorUpdateCompletionAction::AckMonitorEvents { + monitor_events_to_ack, + }); + } let post_update_data = self.handle_new_monitor_update( &mut peer_state.in_flight_monitor_updates, &mut peer_state.monitor_update_blocked_actions, @@ -19320,10 +19329,14 @@ impl< } } } else { - channel.on_startup_drop_completed_blocked_mon_updates_through( - &logger, - monitor.get_latest_update_id(), - ); + let events_to_ack = channel + .on_startup_drop_completed_blocked_mon_updates_through( + &logger, + monitor.get_latest_update_id(), + ); + for source in events_to_ack { + args.chain_monitor.ack_monitor_event(source); + } log_info!(logger, "Successfully loaded at update_id {} against monitor at update id {} with {} blocked updates", channel.context.get_latest_monitor_update_id(), monitor.get_latest_update_id(), channel.blocked_monitor_updates_pending()); From 6ffa153cd10137ea0def7931385b5fc62cad6519 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 17 Mar 2026 15:31:39 -0400 Subject: [PATCH 20/28] Support persistent monitor events Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we complete work that was built on recent prior commits and actually start re-providing monitor events on startup if they went un-acked during runtime. This isn't actually supported in prod yet, so this new code will run randomly in tests, to ensure we still support the old paths. --- lightning/src/chain/channelmonitor.rs | 61 +++++++++++++++++++++------ lightning/src/ln/channelmanager.rs | 27 +++++++++++- lightning/src/ln/monitor_tests.rs | 3 ++ lightning/src/util/config.rs | 6 +++ 4 files changed, 84 insertions(+), 13 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index c48512ec228..36c650f8daf 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -1288,6 +1288,11 @@ pub(crate) struct ChannelMonitorImpl { // block/transaction-connected events and *not* during block/transaction-disconnected events, // we further MUST NOT generate events during block/transaction-disconnection. pending_monitor_events: Vec<(u64, MonitorEvent)>, + // `MonitorEvent`s that have been provided to the `ChannelManager` via + // [`ChannelMonitor::get_and_clear_pending_monitor_events`] and are awaiting + // [`ChannelMonitor::ack_monitor_event`] for removal. If an event in this queue is not ack'd, it + // will be re-provided to the `ChannelManager` on startup. + provided_monitor_events: Vec<(u64, MonitorEvent)>, /// When set, monitor events are retained until explicitly acked rather than cleared on read. /// /// Allows the ChannelManager to reconstruct pending HTLC state by replaying monitor events on @@ -1764,7 +1769,12 @@ pub(crate) fn write_chanmon_internal( // Only write `persistent_events_enabled` if it's set to true, as it's an even TLV. let persistent_events_enabled = channel_monitor.persistent_events_enabled.then_some(()); let pending_mon_evs_with_ids = if persistent_events_enabled.is_some() { - Some(Iterable(channel_monitor.pending_monitor_events.iter())) + Some(Iterable( + channel_monitor + .provided_monitor_events + .iter() + .chain(channel_monitor.pending_monitor_events.iter()), + )) } else { None }; @@ -1978,6 +1988,7 @@ impl ChannelMonitor { payment_preimages: new_hash_map(), pending_monitor_events: Vec::new(), + provided_monitor_events: Vec::new(), persistent_events_enabled: false, next_monitor_event_id: 0, pending_events: Vec::new(), @@ -2203,8 +2214,15 @@ impl ChannelMonitor { /// Removes a [`MonitorEvent`] by its event ID, acknowledging that it has been processed. /// Generally called by [`chain::Watch::ack_monitor_event`]. - pub fn ack_monitor_event(&self, _event_id: u64) { - // TODO: once events have ids, remove the corresponding event here + pub fn ack_monitor_event(&self, event_id: u64) { + let inner = &mut *self.inner.lock().unwrap(); + inner.ack_monitor_event(event_id); + } + + /// Enables persistent monitor events mode. When enabled, monitor events are retained until + /// explicitly acked rather than cleared on read. + pub(crate) fn set_persistent_events_enabled(&self, enabled: bool) { + self.inner.lock().unwrap().persistent_events_enabled = enabled; } /// Copies [`MonitorEvent`] state from `other` into `self`. @@ -2212,11 +2230,16 @@ impl ChannelMonitor { /// serialization round-trip. #[cfg(any(test, feature = "_test_utils"))] pub fn copy_monitor_event_state(&self, other: &ChannelMonitor) { - let (pending, next_id) = { + let (provided, pending, next_id) = { let other_inner = other.inner.lock().unwrap(); - (other_inner.pending_monitor_events.clone(), other_inner.next_monitor_event_id) + ( + other_inner.provided_monitor_events.clone(), + other_inner.pending_monitor_events.clone(), + other_inner.next_monitor_event_id, + ) }; let mut self_inner = self.inner.lock().unwrap(); + self_inner.provided_monitor_events = provided; self_inner.pending_monitor_events = pending; self_inner.next_monitor_event_id = next_id; } @@ -4619,10 +4642,23 @@ impl ChannelMonitorImpl { ); } + fn ack_monitor_event(&mut self, event_id: u64) { + self.provided_monitor_events.retain(|(id, _)| *id != event_id); + // If this event was generated prior to a restart, it may be in this queue instead + self.pending_monitor_events.retain(|(id, _)| *id != event_id); + } + fn get_and_clear_pending_monitor_events(&mut self) -> Vec<(u64, MonitorEvent)> { - let mut ret = Vec::new(); - mem::swap(&mut ret, &mut self.pending_monitor_events); - ret + if self.persistent_events_enabled { + let mut ret = Vec::new(); + mem::swap(&mut ret, &mut self.pending_monitor_events); + self.provided_monitor_events.extend(ret.iter().cloned()); + ret + } else { + let mut ret = Vec::new(); + mem::swap(&mut ret, &mut self.pending_monitor_events); + ret + } } /// Gets the set of events that are repeated regularly (e.g. those which RBF bump @@ -5946,8 +5982,8 @@ impl ChannelMonitorImpl { if inbound_htlc_expiry > max_expiry_height { continue; } - let duplicate_event = self.pending_monitor_events.iter().any( - |(_, update)| if let &MonitorEvent::HTLCEvent(ref upd) = update { + let duplicate_event = self.pending_monitor_events.iter().chain(self.provided_monitor_events.iter()) + .any(|(_, update)| if let &MonitorEvent::HTLCEvent(ref upd) = update { upd.source == *source } else { false }); if duplicate_event { @@ -6363,7 +6399,7 @@ impl ChannelMonitorImpl { // HTLC resolution backwards to and figure out whether we learned a preimage from it. if let Some((source, payment_hash, amount_msat)) = payment_data { if accepted_preimage_claim { - if !self.pending_monitor_events.iter().any( + if !self.pending_monitor_events.iter().chain(self.provided_monitor_events.iter()).any( |(_, update)| if let &MonitorEvent::HTLCEvent(ref upd) = update { upd.source == source } else { false }) { self.onchain_events_awaiting_threshold_conf.push(OnchainEventEntry { txid: tx.compute_txid(), @@ -6385,7 +6421,7 @@ impl ChannelMonitorImpl { }), &mut self.next_monitor_event_id); } } else if offered_preimage_claim { - if !self.pending_monitor_events.iter().any( + if !self.pending_monitor_events.iter().chain(self.provided_monitor_events.iter()).any( |(_, update)| if let &MonitorEvent::HTLCEvent(ref upd) = update { upd.source == source } else { false }) { @@ -6974,6 +7010,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP payment_preimages, pending_monitor_events, + provided_monitor_events: Vec::new(), persistent_events_enabled: persistent_events_enabled.is_some(), next_monitor_event_id, pending_events, diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index fb13cc27716..bf01c95a203 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -3681,6 +3681,8 @@ impl< our_network_pubkey, current_timestamp, expanded_inbound_key, node_signer.get_receive_auth_key(), secp_ctx.clone(), message_router, logger.clone(), ); + #[cfg(test)] + let override_persistent_monitor_events = config.override_persistent_monitor_events; ChannelManager { config: RwLock::new(config), @@ -3737,7 +3739,27 @@ impl< logger, - persistent_monitor_events: false, + persistent_monitor_events: { + #[cfg(not(test))] + { false } + #[cfg(test)] + { + override_persistent_monitor_events.unwrap_or_else(|| { + use core::hash::{BuildHasher, Hasher}; + match std::env::var("LDK_TEST_PERSISTENT_MON_EVENTS") { + Ok(val) => match val.as_str() { + "1" => true, + "0" => false, + _ => panic!("LDK_TEST_PERSISTENT_MON_EVENTS must be 0 or 1, got: {}", val), + }, + Err(_) => { + let rand_val = std::collections::hash_map::RandomState::new().build_hasher().finish(); + rand_val % 2 == 0 + }, + } + }) + } + }, #[cfg(feature = "_test_utils")] testing_dnssec_proof_offer_resolution_override: Mutex::new(new_hash_map()), @@ -11752,6 +11774,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ fail_chan!("Already had channel with the new channel_id"); }, hash_map::Entry::Vacant(e) => { + monitor.set_persistent_events_enabled(self.persistent_monitor_events); let monitor_res = self.chain_monitor.watch_channel(monitor.channel_id(), monitor); if let Ok(persist_state) = monitor_res { // There's no problem signing a counterparty's funding transaction if our monitor @@ -11922,6 +11945,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ match chan .funding_signed(&msg, best_block, &self.signer_provider, &self.logger) .and_then(|(funded_chan, monitor)| { + monitor.set_persistent_events_enabled(self.persistent_monitor_events); self.chain_monitor .watch_channel(funded_chan.context.channel_id(), monitor) .map_err(|()| { @@ -12837,6 +12861,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ if let Some(chan) = chan.as_funded_mut() { if let Some(monitor) = monitor_opt { + monitor.set_persistent_events_enabled(self.persistent_monitor_events); let monitor_res = self.chain_monitor.watch_channel(monitor.channel_id(), monitor); if let Ok(persist_state) = monitor_res { diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 1a68e5165a8..4c97df2f42e 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3592,6 +3592,9 @@ fn do_test_lost_timeout_monitor_events(confirm_tx: CommitmentType, dust_htlcs: b let mut cfg = test_default_channel_config(); cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true; cfg.channel_handshake_config.negotiate_anchor_zero_fee_commitments = p2a_anchor; + // This test specifically tests lost monitor events, which requires the legacy + // (non-persistent) monitor event behavior. + cfg.override_persistent_monitor_events = Some(false); let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())]; let chanmon_cfgs = create_chanmon_cfgs(3); diff --git a/lightning/src/util/config.rs b/lightning/src/util/config.rs index 14c507184ac..b3be0896f8f 100644 --- a/lightning/src/util/config.rs +++ b/lightning/src/util/config.rs @@ -1103,6 +1103,10 @@ pub struct UserConfig { /// /// [`ChannelManager::splice_channel`]: crate::ln::channelmanager::ChannelManager::splice_channel pub reject_inbound_splices: bool, + /// If set to `Some`, overrides the random selection of whether to use persistent monitor + /// events. Only available in tests. + #[cfg(test)] + pub override_persistent_monitor_events: Option, } impl Default for UserConfig { @@ -1119,6 +1123,8 @@ impl Default for UserConfig { enable_htlc_hold: false, hold_outbound_htlcs_at_next_hop: false, reject_inbound_splices: true, + #[cfg(test)] + override_persistent_monitor_events: None, } } } From 381f2cb12b675c4bbded614ae62b62feeb9136b5 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 24 Mar 2026 13:05:25 -0400 Subject: [PATCH 21/28] Persist user_channeL_id in monitors Will be used in upcoming commits when generating MonitorEvensts. --- lightning/src/chain/channelmonitor.rs | 11 ++++++++++- lightning/src/ln/channel.rs | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 36c650f8daf..95e2eeda429 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -1211,6 +1211,9 @@ pub(crate) struct ChannelMonitorImpl { /// True if this channel was configured for manual funding broadcasts. Monitors written by /// versions prior to LDK 0.2 load with `false` until a new update persists it. is_manual_broadcast: bool, + /// The user-provided channel ID, used to populate events generated by the monitor. + /// Added in LDK 0.4. + user_channel_id: Option, /// True once we've observed either funding transaction on-chain. Older monitors prior to LDK 0.2 /// assume this is `true` when absent during upgrade so holder broadcasts aren't gated unexpectedly. /// In manual-broadcast channels we also use this to trigger deferred holder @@ -1805,6 +1808,7 @@ pub(crate) fn write_chanmon_internal( (37, channel_monitor.funding_seen_onchain, required), (39, channel_monitor.best_block.previous_blocks, required), (41, channel_monitor.next_monitor_event_id, required), + (43, channel_monitor.user_channel_id, option), }); Ok(()) @@ -1909,7 +1913,7 @@ impl ChannelMonitor { commitment_transaction_number_obscure_factor: u64, initial_holder_commitment_tx: HolderCommitmentTransaction, best_block: BestBlock, counterparty_node_id: PublicKey, channel_id: ChannelId, - is_manual_broadcast: bool, + is_manual_broadcast: bool, user_channel_id: u128, ) -> ChannelMonitor { assert!(commitment_transaction_number_obscure_factor <= (1 << 48)); @@ -1958,6 +1962,7 @@ impl ChannelMonitor { pending_funding: vec![], is_manual_broadcast, + user_channel_id: Some(user_channel_id), funding_seen_onchain: false, latest_update_id: 0, @@ -6806,6 +6811,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP let mut is_manual_broadcast = RequiredWrapper(None); let mut funding_seen_onchain = RequiredWrapper(None); let mut best_block_previous_blocks = None; + let mut user_channel_id: Option = None; let mut persistent_events_enabled: Option<()> = None; let mut next_monitor_event_id: Option = None; let mut pending_mon_evs_with_ids: Option> = None; @@ -6835,6 +6841,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP (37, funding_seen_onchain, (default_value, true)), (39, best_block_previous_blocks, option), // Added and always set in 0.3 (41, next_monitor_event_id, option), + (43, user_channel_id, option), }); if let Some(previous_blocks) = best_block_previous_blocks { best_block.previous_blocks = previous_blocks; @@ -6978,6 +6985,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP }, pending_funding: pending_funding.unwrap_or(vec![]), is_manual_broadcast: is_manual_broadcast.0.unwrap(), + user_channel_id, // Older monitors prior to LDK 0.2 assume this is `true` when absent // during upgrade so holder broadcasts aren't gated unexpectedly. funding_seen_onchain: funding_seen_onchain.0.unwrap(), @@ -7139,6 +7147,7 @@ pub(super) fn dummy_monitor( dummy_key, channel_id, false, + 0, ) } diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 4839424d38c..42ae36b0fc3 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -3631,7 +3631,7 @@ trait InitialRemoteCommitmentReceiver { funding.get_holder_selected_contest_delay(), &context.destination_script, &funding.channel_transaction_parameters, funding.is_outbound(), obscure_factor, holder_commitment_tx, best_block, context.counterparty_node_id, context.channel_id(), - context.is_manual_broadcast, + context.is_manual_broadcast, context.get_user_id(), ); channel_monitor.provide_initial_counterparty_commitment_tx( counterparty_initial_commitment_tx.clone(), From b7e165e37294c7e6924d65ba903640abd9c04310 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 24 Mar 2026 14:21:11 -0400 Subject: [PATCH 22/28] Add skimmed fee to monitor updates XXX --- lightning/src/chain/channelmonitor.rs | 79 ++++++++++++++++++++++++--- lightning/src/ln/channel.rs | 8 ++- lightning/src/util/ser.rs | 1 + 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 95e2eeda429..2719ed7d03d 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -639,6 +639,21 @@ impl_writeable_tlv_based_enum_upgradable!(OnchainEvent, }, ); +/// Information about an HTLC forward that was claimed via preimage on the outbound edge, included +/// in [`ChannelMonitorUpdateStep`] so the monitor can generate events with the full claim context. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct OutboundHTLCClaim { + pub htlc_id: SentHTLCId, + pub preimage: PaymentPreimage, + pub skimmed_fee_msat: Option, +} + +impl_writeable_tlv_based!(OutboundHTLCClaim, { + (0, htlc_id, required), + (2, preimage, required), + (4, skimmed_fee_msat, option), +}); + #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum ChannelMonitorUpdateStep { LatestHolderCommitmentTXInfo { @@ -650,13 +665,13 @@ pub(crate) enum ChannelMonitorUpdateStep { /// `htlc_outputs` will only include dust HTLCs. We still have to track the /// `Option` for backwards compatibility. htlc_outputs: Vec<(HTLCOutputInCommitment, Option, Option)>, - claimed_htlcs: Vec<(SentHTLCId, PaymentPreimage)>, + claimed_htlcs: Vec, nondust_htlc_sources: Vec, }, LatestHolderCommitment { commitment_txs: Vec, htlc_data: CommitmentHTLCData, - claimed_htlcs: Vec<(SentHTLCId, PaymentPreimage)>, + claimed_htlcs: Vec, }, LatestCounterpartyCommitmentTXInfo { commitment_txid: Txid, @@ -737,9 +752,32 @@ impl ChannelMonitorUpdateStep { impl_writeable_tlv_based_enum_upgradable!(ChannelMonitorUpdateStep, (0, LatestHolderCommitmentTXInfo) => { (0, commitment_tx, required), - (1, claimed_htlcs, optional_vec), + (1, _legacy_claimed_htlcs, (legacy, Vec<(SentHTLCId, PaymentPreimage)>, + |v: Option<&Vec<(SentHTLCId, PaymentPreimage)>>| { + // Only populate from legacy if the new-format tag 5 wasn't read. + if let Some(legacy) = v { + if claimed_htlcs.as_ref().map_or(true, |v| v.is_empty()) { + claimed_htlcs = Some(legacy.iter().map(|(htlc_id, preimage)| OutboundHTLCClaim { + htlc_id: *htlc_id, preimage: *preimage, skimmed_fee_msat: None, + }).collect()); + } + } + Ok(()) + }, + |us: &ChannelMonitorUpdateStep| match us { + ChannelMonitorUpdateStep::LatestHolderCommitmentTXInfo { claimed_htlcs, .. } => { + if !claimed_htlcs.is_empty() { + let legacy: Vec<(SentHTLCId, PaymentPreimage)> = claimed_htlcs.iter().map(|claim| (claim.htlc_id, claim.preimage)).collect(); + Some(legacy) + } else { + None + } + } + _ => None + })), (2, htlc_outputs, required_vec), (4, nondust_htlc_sources, optional_vec), + (5, claimed_htlcs, optional_vec), }, (1, LatestCounterpartyCommitmentTXInfo) => { (0, commitment_txid, required), @@ -774,7 +812,30 @@ impl_writeable_tlv_based_enum_upgradable!(ChannelMonitorUpdateStep, (8, LatestHolderCommitment) => { (1, commitment_txs, required_vec), (3, htlc_data, required), - (5, claimed_htlcs, required_vec), + (5, _legacy_claimed_htlcs, (legacy, Vec<(SentHTLCId, PaymentPreimage)>, + |v: Option<&Vec<(SentHTLCId, PaymentPreimage)>>| { + // Only populate from legacy if the new-format tag 7 wasn't read. + if let Some(legacy) = v { + if claimed_htlcs.as_ref().map_or(true, |v| v.is_empty()) { + claimed_htlcs = Some(legacy.iter().map(|(htlc_id, preimage)| OutboundHTLCClaim { + htlc_id: *htlc_id, preimage: *preimage, skimmed_fee_msat: None, + }).collect()); + } + } + Ok(()) + }, + |us: &ChannelMonitorUpdateStep| match us { + ChannelMonitorUpdateStep::LatestHolderCommitment { claimed_htlcs, .. } => { + if !claimed_htlcs.is_empty() { + let legacy: Vec<(SentHTLCId, PaymentPreimage)> = claimed_htlcs.iter().map(|claim| (claim.htlc_id, claim.preimage)).collect(); + Some(legacy) + } else { + None + } + } + _ => None + })), + (7, claimed_htlcs, optional_vec), }, (10, RenegotiatedFunding) => { (1, channel_parameters, (required: ReadableArgs, None)), @@ -3681,7 +3742,7 @@ impl ChannelMonitorImpl { fn provide_latest_holder_commitment_tx( &mut self, holder_commitment_tx: HolderCommitmentTransaction, htlc_outputs: &[(HTLCOutputInCommitment, Option, Option)], - claimed_htlcs: &[(SentHTLCId, PaymentPreimage)], mut nondust_htlc_sources: Vec, + claimed_htlcs: &[OutboundHTLCClaim], mut nondust_htlc_sources: Vec, ) -> Result<(), &'static str> { let dust_htlcs = if htlc_outputs.iter().any(|(_, s, _)| s.is_some()) { // If we have non-dust HTLCs in htlc_outputs, ensure they match the HTLCs in the @@ -3802,7 +3863,7 @@ impl ChannelMonitorImpl { fn update_holder_commitment_data( &mut self, commitment_txs: Vec, - mut htlc_data: CommitmentHTLCData, claimed_htlcs: &[(SentHTLCId, PaymentPreimage)], + mut htlc_data: CommitmentHTLCData, claimed_htlcs: &[OutboundHTLCClaim], ) -> Result<(), &'static str> { self.verify_matching_commitment_transactions( commitment_txs.iter().map(|holder_commitment_tx| holder_commitment_tx.deref()), @@ -3822,7 +3883,7 @@ impl ChannelMonitorImpl { mem::swap(&mut htlc_data, &mut self.current_holder_htlc_data); self.prev_holder_htlc_data = Some(htlc_data); - for (claimed_htlc_id, claimed_preimage) in claimed_htlcs { + for claim in claimed_htlcs { #[cfg(debug_assertions)] { let cur_counterparty_htlcs = self @@ -3832,13 +3893,13 @@ impl ChannelMonitorImpl { .unwrap(); assert!(cur_counterparty_htlcs.iter().any(|(_, source_opt)| { if let Some(source) = source_opt { - SentHTLCId::from_source(source) == *claimed_htlc_id + SentHTLCId::from_source(source) == claim.htlc_id } else { false } })); } - self.counterparty_fulfilled_htlcs.insert(*claimed_htlc_id, *claimed_preimage); + self.counterparty_fulfilled_htlcs.insert(claim.htlc_id, claim.preimage); } Ok(()) diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 42ae36b0fc3..a25ce3fe095 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -33,7 +33,7 @@ use crate::chain::chaininterface::{ use crate::chain::chainmonitor::MonitorEventSource; use crate::chain::channelmonitor::{ ChannelMonitor, ChannelMonitorUpdate, ChannelMonitorUpdateStep, CommitmentHTLCData, - LATENCY_GRACE_PERIOD_BLOCKS, + OutboundHTLCClaim, LATENCY_GRACE_PERIOD_BLOCKS, }; use crate::chain::transaction::{OutPoint, TransactionData}; use crate::chain::BestBlock; @@ -8589,7 +8589,11 @@ where // claims, but such an upgrade is unlikely and including claimed HTLCs here // fixes a bug which the user was exposed to on 0.0.104 when they started the // claim anyway. - claimed_htlcs.push((SentHTLCId::from_source(&htlc.source), preimage)); + claimed_htlcs.push(OutboundHTLCClaim { + htlc_id: SentHTLCId::from_source(&htlc.source), + preimage, + skimmed_fee_msat: htlc.skimmed_fee_msat, + }); } htlc.state = OutboundHTLCState::AwaitingRemoteRevokeToRemove(reason); need_commitment = true; diff --git a/lightning/src/util/ser.rs b/lightning/src/util/ser.rs index 2b02629d3b0..4b00615bbf5 100644 --- a/lightning/src/util/ser.rs +++ b/lightning/src/util/ser.rs @@ -1112,6 +1112,7 @@ impl_writeable_for_vec_with_element_length_prefix!(&crate::ln::msgs::UpdateAddHT impl_for_vec!(u32); impl_for_vec!(crate::events::HTLCLocator); impl_for_vec!(crate::ln::types::ChannelId); +impl_for_vec!(crate::chain::channelmonitor::OutboundHTLCClaim); impl Writeable for Vec { #[inline] From 9fba5f19608c6fdec3fbf03a00013385f249c402 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 24 Mar 2026 14:48:56 -0400 Subject: [PATCH 23/28] Include skimmed fee and user chan id in monitor event XXX --- lightning/src/chain/channelmonitor.rs | 14 ++++++++++++++ lightning/src/ln/channelmanager.rs | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 2719ed7d03d..cbafa2b4cc1 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -261,12 +261,16 @@ pub struct HTLCUpdate { pub(crate) payment_preimage: Option, pub(crate) source: HTLCSource, pub(crate) htlc_value_satoshis: Option, + pub(crate) skimmed_fee_msat: Option, + pub(crate) next_user_channel_id: Option, } impl_writeable_tlv_based!(HTLCUpdate, { (0, payment_hash, required), (1, htlc_value_satoshis, option), (2, source, required), (4, payment_preimage, option), + (5, skimmed_fee_msat, option), + (7, next_user_channel_id, option), }); /// If an output goes from claimable only by us to claimable by us or our counterparty within this @@ -4633,6 +4637,8 @@ impl ChannelMonitorImpl { payment_preimage: None, source: source.clone(), htlc_value_satoshis, + skimmed_fee_msat: None, + next_user_channel_id: None, }), &mut self.next_monitor_event_id, ); @@ -5957,6 +5963,8 @@ impl ChannelMonitorImpl { payment_preimage: None, source, htlc_value_satoshis, + skimmed_fee_msat: None, + next_user_channel_id: None, })); self.htlcs_resolved_on_chain.push(IrrevocablyResolvedHTLC { commitment_tx_output_idx, @@ -6067,6 +6075,8 @@ impl ChannelMonitorImpl { payment_preimage: None, payment_hash: htlc.payment_hash, htlc_value_satoshis: Some(htlc.amount_msat / 1000), + skimmed_fee_msat: None, + next_user_channel_id: None, }), &mut self.next_monitor_event_id); } } @@ -6484,6 +6494,8 @@ impl ChannelMonitorImpl { payment_preimage: Some(payment_preimage), payment_hash, htlc_value_satoshis: Some(amount_msat / 1000), + skimmed_fee_msat: None, + next_user_channel_id: None, }), &mut self.next_monitor_event_id); } } else if offered_preimage_claim { @@ -6508,6 +6520,8 @@ impl ChannelMonitorImpl { payment_preimage: Some(payment_preimage), payment_hash, htlc_value_satoshis: Some(amount_msat / 1000), + skimmed_fee_msat: None, + next_user_channel_id: None, }), &mut self.next_monitor_event_id); } } else { diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index bf01c95a203..4c7d2db7134 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -13718,12 +13718,12 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), - None, + htlc_update.skimmed_fee_msat, true, counterparty_node_id, funding_outpoint, channel_id, - None, + htlc_update.next_user_channel_id, None, None, Some(event_id), From c5ea5692dbb03c2a274228ed52dcd9b031f0a726 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 30 Mar 2026 11:05:53 -0400 Subject: [PATCH 24/28] Track recent monitor updates in TestChainMonitor And log them in check_added_monitors if it fails. --- lightning/src/ln/functional_test_utils.rs | 15 ++++++++++++++- lightning/src/util/test_utils.rs | 9 +++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs index 84cdf785da5..5ef00b8aa37 100644 --- a/lightning/src/ln/functional_test_utils.rs +++ b/lightning/src/ln/functional_test_utils.rs @@ -1274,7 +1274,20 @@ pub fn check_added_monitors>(node: & if let Some(chain_monitor) = node.chain_monitor() { let mut added_monitors = chain_monitor.added_monitors.lock().unwrap(); let n = added_monitors.len(); - assert_eq!(n, count, "expected {} monitors to be added, not {}", count, n); + if n != count { + let recent = chain_monitor.recent_monitor_updates.lock().unwrap(); + let mut desc = String::new(); + for (i, (chan_id, update)) in recent.iter().take(n).enumerate() { + desc += &format!( + "\n [{}] chan={} update_id={} steps={:?}", + i, chan_id, update.update_id, update.updates + ); + } + panic!( + "expected {} monitors to be added, not {}. Last {} updates (most recent first):{}", + count, n, n, desc + ); + } added_monitors.clear(); } } diff --git a/lightning/src/util/test_utils.rs b/lightning/src/util/test_utils.rs index 0961f31cfe6..f4eb6287921 100644 --- a/lightning/src/util/test_utils.rs +++ b/lightning/src/util/test_utils.rs @@ -521,6 +521,8 @@ pub struct TestChainMonitor<'a> { /// deferred operations. This allows tests to control exactly when queued monitor updates /// are applied to the in-memory monitor. pub pause_flush: AtomicBool, + /// Buffer of the last 20 monitor updates, most recent first. + pub recent_monitor_updates: Mutex>, } impl<'a> TestChainMonitor<'a> { pub fn new( @@ -581,6 +583,7 @@ impl<'a> TestChainMonitor<'a> { #[cfg(feature = "std")] write_blocker: Mutex::new(None), pause_flush: AtomicBool::new(false), + recent_monitor_updates: Mutex::new(Vec::new()), } } @@ -679,6 +682,12 @@ impl<'a> chain::Watch for TestChainMonitor<'a> { .or_insert(Vec::new()) .push(update.clone()); + { + let mut recent = self.recent_monitor_updates.lock().unwrap(); + recent.insert(0, (channel_id, update.clone())); + recent.truncate(20); + } + if let Some(exp) = self.expect_channel_force_closed.lock().unwrap().take() { assert_eq!(channel_id, exp.0); assert_eq!(update.updates.len(), 1); From fe3e82b60a346391832416ba1fa3cdba3a26e1a0 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 30 Mar 2026 12:27:11 -0400 Subject: [PATCH 25/28] Prepare tests for deprecating RAA-blockers And tweak a few test utils to have clearer error messages. Splitting up tests with may variants makes it easier to debug tests. Tests marked "legacy" have test coverage replicated in the next commit, that does not rely on RAA blocking actions. --- lightning/src/chain/channelmonitor.rs | 4 +- lightning/src/ln/chanmon_update_fail_tests.rs | 39 +++++++++++++------ lightning/src/ln/functional_test_utils.rs | 5 ++- lightning/src/ln/functional_tests.rs | 11 +++++- 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index cbafa2b4cc1..a8da7054804 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -193,7 +193,7 @@ fn push_monitor_event( } /// An event to be processed by the ChannelManager. -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, PartialEq, Eq, Debug)] pub enum MonitorEvent { /// A monitor event containing an HTLCUpdate. HTLCEvent(HTLCUpdate), @@ -255,7 +255,7 @@ impl_writeable_tlv_based_enum_upgradable_legacy!(MonitorEvent, /// Simple structure sent back by `chain::Watch` when an HTLC from a forward channel is detected on /// chain. Used to update the corresponding HTLC in the backward channel. Failing to pass the /// preimage claim backward will lead to loss of funds. -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, PartialEq, Eq, Debug)] pub struct HTLCUpdate { pub(crate) payment_hash: PaymentHash, pub(crate) payment_preimage: Option, diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index 796b66830d1..4a9f371d459 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -3774,7 +3774,7 @@ fn test_inverted_mon_completion_order() { do_test_inverted_mon_completion_order(false, false); } -fn do_test_durable_preimages_on_closed_channel( +fn do_test_durable_preimages_on_closed_channel_legacy( close_chans_before_reload: bool, close_only_a: bool, hold_post_reload_mon_update: bool, ) { // Test that we can apply a `ChannelMonitorUpdate` with a payment preimage even if the channel @@ -4014,16 +4014,31 @@ fn do_test_durable_preimages_on_closed_channel( } #[test] -fn test_durable_preimages_on_closed_channel() { - do_test_durable_preimages_on_closed_channel(true, true, true); - do_test_durable_preimages_on_closed_channel(true, true, false); - do_test_durable_preimages_on_closed_channel(true, false, true); - do_test_durable_preimages_on_closed_channel(true, false, false); - do_test_durable_preimages_on_closed_channel(false, false, true); - do_test_durable_preimages_on_closed_channel(false, false, false); +fn test_durable_preimages_on_closed_channel_legacy_a() { + do_test_durable_preimages_on_closed_channel_legacy(true, true, true); +} +#[test] +fn test_durable_preimages_on_closed_channel_legacy_b() { + do_test_durable_preimages_on_closed_channel_legacy(true, true, false); +} +#[test] +fn test_durable_preimages_on_closed_channel_legacy_c() { + do_test_durable_preimages_on_closed_channel_legacy(true, false, true); +} +#[test] +fn test_durable_preimages_on_closed_channel_legacy_d() { + do_test_durable_preimages_on_closed_channel_legacy(true, false, false); +} +#[test] +fn test_durable_preimages_on_closed_channel_legacy_e() { + do_test_durable_preimages_on_closed_channel_legacy(false, false, true); +} +#[test] +fn test_durable_preimages_on_closed_channel_legacy_f() { + do_test_durable_preimages_on_closed_channel_legacy(false, false, false); } -fn do_test_reload_mon_update_completion_actions(close_during_reload: bool) { +fn do_test_reload_mon_update_completion_actions_legacy(close_during_reload: bool) { // Test that if a `ChannelMonitorUpdate` completes but a `ChannelManager` isn't serialized // before restart we run the monitor update completion action on startup. let chanmon_cfgs = create_chanmon_cfgs(3); @@ -4139,9 +4154,9 @@ fn do_test_reload_mon_update_completion_actions(close_during_reload: bool) { } #[test] -fn test_reload_mon_update_completion_actions() { - do_test_reload_mon_update_completion_actions(true); - do_test_reload_mon_update_completion_actions(false); +fn test_reload_mon_update_completion_actions_legacy() { + do_test_reload_mon_update_completion_actions_legacy(true); + do_test_reload_mon_update_completion_actions_legacy(false); } fn do_test_glacial_peer_cant_hang(hold_chan_a: bool) { diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs index 5ef00b8aa37..d768d772124 100644 --- a/lightning/src/ln/functional_test_utils.rs +++ b/lightning/src/ln/functional_test_utils.rs @@ -2620,7 +2620,7 @@ macro_rules! expect_htlc_handling_failed_destinations { assert!($expected_failures.contains(&failure_type)); num_expected_failures -= 1; }, - _ => panic!("Unexpected destination"), + e => panic!("Expected HTLCHandlingFailed, got {e:?}"), } } assert_eq!(num_expected_failures, 0); @@ -2644,7 +2644,8 @@ pub fn expect_and_process_pending_htlcs_and_htlc_handling_failed( let events = node.node.get_and_clear_pending_events(); expect_htlc_failure_conditions(events, expected_failures); expect_and_process_pending_htlcs(node, false); - assert!(node.node.get_and_clear_pending_events().is_empty()); + let events = node.node.get_and_clear_pending_events(); + assert!(events.is_empty(), "Unexpected events: {events:?}"); } pub fn expect_and_process_pending_htlcs(node: &Node<'_, '_, '_>, process_twice: bool) { diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs index 7ed46922d8a..81fb99819d3 100644 --- a/lightning/src/ln/functional_tests.rs +++ b/lightning/src/ln/functional_tests.rs @@ -7964,10 +7964,19 @@ fn do_test_onchain_htlc_settlement_after_close( } #[xtest(feature = "_externalize_tests")] -pub fn test_onchain_htlc_settlement_after_close() { +pub fn test_onchain_htlc_settlement_after_close_a() { do_test_onchain_htlc_settlement_after_close(true, true); +} +#[xtest(feature = "_externalize_tests")] +pub fn test_onchain_htlc_settlement_after_close_b() { do_test_onchain_htlc_settlement_after_close(false, true); // Technically redundant, but may as well +} +#[xtest(feature = "_externalize_tests")] +pub fn test_onchain_htlc_settlement_after_close_c() { do_test_onchain_htlc_settlement_after_close(true, false); +} +#[xtest(feature = "_externalize_tests")] +pub fn test_onchain_htlc_settlement_after_close_d() { do_test_onchain_htlc_settlement_after_close(false, false); } From b6b4426d8606ae06d90d3bd25f0f1be16038a285 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 30 Mar 2026 12:46:07 -0400 Subject: [PATCH 26/28] Add EventCompletionAction::AckMonitorEvent When a payment's resolution is communicated to the downstream logic via Payment{Sent,Failed}, we may want to ack the monitor event that tells us about the payment's resolution in the ChannelMonitor to prevent the monitor event from being re-provided to us on startup. Used in upcoming commits. --- lightning/src/ln/channelmanager.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 4c7d2db7134..e2e70eef8d9 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1613,7 +1613,14 @@ pub(crate) enum EventCompletionAction { /// fully-resolved in the [`ChannelMonitor`], which we do via this action. /// Note that this action will be dropped on downgrade to LDK prior to 0.2! ReleasePaymentCompleteChannelMonitorUpdate(PaymentCompleteUpdate), + + /// When a payment's resolution is communicated to the downstream logic via + /// [`Event::PaymentSent`] or [`Event::PaymentFailed`], we may want to ack the [`MonitorEvent`] + /// that tells us about the payment's resolution in the [`ChannelMonitor`] to prevent the monitor + /// event from being re-provided to us on startup. + AckMonitorEvent { event_source: MonitorEventSource }, } + impl_writeable_tlv_based_enum!(EventCompletionAction, (0, ReleaseRAAChannelMonitorUpdate) => { (0, channel_funding_outpoint, option), @@ -1624,6 +1631,9 @@ impl_writeable_tlv_based_enum!(EventCompletionAction, } ChannelId::v1_from_funding_outpoint(channel_funding_outpoint.unwrap()) })), + }, + (3, AckMonitorEvent) => { + (1, event_source, required), } {1, ReleasePaymentCompleteChannelMonitorUpdate} => (), ); @@ -15590,6 +15600,9 @@ impl< } } }, + EventCompletionAction::AckMonitorEvent { event_source } => { + self.chain_monitor.ack_monitor_event(event_source); + }, } } } From e6a0742095d00f7e1fda4b6eebe0434c81d12507 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 30 Mar 2026 12:50:05 -0400 Subject: [PATCH 27/28] Persistent HTLC forward claim monitor events Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we add MonitorEvent resolution of forwarded HTLC claims, which allows us to remove usage of RAA monitor update blocking actions and a significant amount of complex startup reconstruction code. --- lightning/src/chain/channelmonitor.rs | 48 +- lightning/src/ln/chanmon_update_fail_tests.rs | 489 +++++++++++++++++- lightning/src/ln/channelmanager.rs | 184 +++++-- lightning/src/ln/functional_tests.rs | 22 + lightning/src/ln/monitor_tests.rs | 2 + lightning/src/ln/payment_tests.rs | 72 ++- lightning/src/util/test_utils.rs | 10 +- 7 files changed, 772 insertions(+), 55 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index a8da7054804..b44f8ee5853 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2289,6 +2289,18 @@ impl ChannelMonitor { inner.ack_monitor_event(event_id); } + /// Returns true if this monitor has any pending or provided `HTLCEvent`s containing a + /// payment preimage, indicating a claim is in progress. + #[cfg(any(test, feature = "_test_utils"))] + pub fn has_pending_claim_monitor_events(&self) -> bool { + let inner = self.inner.lock().unwrap(); + inner.pending_monitor_events.iter().chain(inner.provided_monitor_events.iter()).any( + |(_, ev)| { + matches!(ev, MonitorEvent::HTLCEvent(HTLCUpdate { payment_preimage: Some(_), .. })) + }, + ) + } + /// Enables persistent monitor events mode. When enabled, monitor events are retained until /// explicitly acked rather than cleared on read. pub(crate) fn set_persistent_events_enabled(&self, enabled: bool) { @@ -3888,20 +3900,32 @@ impl ChannelMonitorImpl { self.prev_holder_htlc_data = Some(htlc_data); for claim in claimed_htlcs { - #[cfg(debug_assertions)] - { - let cur_counterparty_htlcs = self - .funding - .counterparty_claimable_outpoints - .get(&self.funding.current_counterparty_commitment_txid.unwrap()) - .unwrap(); - assert!(cur_counterparty_htlcs.iter().any(|(_, source_opt)| { + let htlc_opt = self + .funding + .counterparty_claimable_outpoints + .get(&self.funding.current_counterparty_commitment_txid.unwrap()) + .unwrap() + .iter() + .find_map(|(htlc, source_opt)| { if let Some(source) = source_opt { - SentHTLCId::from_source(source) == claim.htlc_id - } else { - false + if SentHTLCId::from_source(source) == claim.htlc_id { + return Some((htlc, source)); + } } - })); + None + }); + debug_assert!(htlc_opt.is_some()); + if self.persistent_events_enabled { + if let Some((htlc, source)) = htlc_opt { + self.push_monitor_event(MonitorEvent::HTLCEvent(HTLCUpdate { + payment_hash: htlc.payment_hash, + payment_preimage: Some(claim.preimage), + source: *source.clone(), + htlc_value_satoshis: Some(htlc.amount_msat), + skimmed_fee_msat: claim.skimmed_fee_msat, + next_user_channel_id: self.user_channel_id, + })); + } } self.counterparty_fulfilled_htlcs.insert(claim.htlc_id, claim.preimage); } diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index 4a9f371d459..59a711babbf 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -95,6 +95,9 @@ fn test_monitor_and_persister_update_fail() { (nodes[0].keys_manager, nodes[0].keys_manager), ) .unwrap(); + // The deserialized monitor will reset the monitor event state, so copy it + // from the live monitor before comparing. + new_monitor.copy_monitor_event_state(&monitor); assert!(new_monitor == *monitor); new_monitor }; @@ -3601,7 +3604,12 @@ fn do_test_inverted_mon_completion_order( let chain_mon; let node_b_reload; - let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]); + // When persistent monitor events is supported, there is no need to block the downstream RAA + // monitor update until the inbound edge gets the preimage. + let mut cfg = test_default_channel_config(); + cfg.override_persistent_monitor_events = Some(false); + let node_chanmgrs = + create_node_chanmgrs(3, &node_cfgs, &[Some(cfg.clone()), Some(cfg.clone()), Some(cfg)]); let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); let node_a_id = nodes[0].node.get_our_node_id(); @@ -3793,7 +3801,10 @@ fn do_test_durable_preimages_on_closed_channel_legacy( let chain_mon; let node_b_reload; - let legacy_cfg = test_legacy_channel_config(); + let mut legacy_cfg = test_legacy_channel_config(); + // This test relies on legacy RAA monitor update blocking behavior, which isn't in place if + // persistent_monitor_events is set. + legacy_cfg.override_persistent_monitor_events = Some(false); let node_chanmgrs = create_node_chanmgrs( 3, &node_cfgs, @@ -4038,6 +4049,223 @@ fn test_durable_preimages_on_closed_channel_legacy_f() { do_test_durable_preimages_on_closed_channel_legacy(false, false, false); } +fn do_test_durable_preimages_on_closed_channel( + close_chans_before_reload: bool, close_only_a: bool, +) { + // Test that we can apply a `ChannelMonitorUpdate` with a payment preimage even if the channel + // is force-closed between when we generate the update on reload and when we go to handle the + // update or prior to generating the update at all. + + if !close_chans_before_reload && close_only_a { + // If we're not closing, it makes no sense to "only close A" + panic!(); + } + + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + + let persister; + let chain_mon; + let node_b_reload; + + let mut legacy_cfg = test_legacy_channel_config(); + legacy_cfg.override_persistent_monitor_events = Some(true); + let node_chanmgrs = create_node_chanmgrs( + 3, + &node_cfgs, + &[Some(legacy_cfg.clone()), Some(legacy_cfg.clone()), Some(legacy_cfg.clone())], + ); + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + let node_a_id = nodes[0].node.get_our_node_id(); + let node_b_id = nodes[1].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_id_ab = create_announced_chan_between_nodes(&nodes, 0, 1).2; + let chan_id_bc = create_announced_chan_between_nodes(&nodes, 1, 2).2; + + // Route a payment from A, through B, to C, then claim it on C. Once we pass B the + // `update_fulfill_htlc` we have a monitor update for both of B's channels. We complete the one + // on the B<->C channel but leave the A<->B monitor update pending, then reload B. + let (payment_preimage, payment_hash, ..) = + route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000); + + let mon_ab = get_monitor!(nodes[1], chan_id_ab).encode(); + + nodes[2].node.claim_funds(payment_preimage); + check_added_monitors(&nodes[2], 1); + expect_payment_claimed!(nodes[2], payment_hash, 1_000_000); + + chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); + let mut cs_updates = get_htlc_update_msgs(&nodes[2], &node_b_id); + nodes[1].node.handle_update_fulfill_htlc(node_c_id, cs_updates.update_fulfill_htlcs.remove(0)); + + // B generates a new monitor update for the A <-> B channel, but doesn't send the new messages + // for it since the monitor update is marked in-progress. + check_added_monitors(&nodes[1], 1); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + + // Now step the Commitment Signed Dance between B and C forward a bit, ensuring we won't get + // the preimage when the nodes reconnect, at which point we have to ensure we get it from the + // ChannelMonitor. + nodes[1].node.handle_commitment_signed_batch_test(node_c_id, &cs_updates.commitment_signed); + check_added_monitors(&nodes[1], 1); + let _ = get_revoke_commit_msgs(&nodes[1], &node_c_id); + + let mon_bc = get_monitor!(nodes[1], chan_id_bc).encode(); + + if close_chans_before_reload { + if !close_only_a { + chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); + let message = "Channel force-closed".to_owned(); + nodes[1] + .node + .force_close_broadcasting_latest_txn(&chan_id_bc, &node_c_id, message.clone()) + .unwrap(); + check_closed_broadcast(&nodes[1], 1, true); + let reason = + ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event(&nodes[1], 1, reason, &[node_c_id], 100000); + } + + chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); + let message = "Channel force-closed".to_owned(); + nodes[1] + .node + .force_close_broadcasting_latest_txn(&chan_id_ab, &node_a_id, message.clone()) + .unwrap(); + check_closed_broadcast(&nodes[1], 1, true); + let reason = + ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event(&nodes[1], 1, reason, &[node_a_id], 100000); + } + + // Now reload node B + let manager_b = nodes[1].node.encode(); + reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, chain_mon, node_b_reload); + nodes[1].disable_monitor_completeness_assertion(); + nodes[1] + .chain_monitor + .deterministic_monitor_events + .store(true, core::sync::atomic::Ordering::Release); + + nodes[0].node.peer_disconnected(node_b_id); + nodes[2].node.peer_disconnected(node_b_id); + + if close_chans_before_reload { + // If the channels were already closed, B will rebroadcast its closing transactions here. + let bs_close_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + if close_only_a { + assert_eq!(bs_close_txn.len(), 2); + } else { + assert_eq!(bs_close_txn.len(), 3); + } + } + + let err_msg = "Channel force-closed".to_owned(); + let reason = ClosureReason::HolderForceClosed { + broadcasted_latest_txn: Some(true), + message: err_msg.clone(), + }; + nodes[0].node.force_close_broadcasting_latest_txn(&chan_id_ab, &node_b_id, err_msg).unwrap(); + check_closed_event(&nodes[0], 1, reason, &[node_b_id], 100000); + check_added_monitors(&nodes[0], 1); + let as_closing_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(as_closing_tx.len(), 1); + + // In order to give A's closing transaction to B without processing background events first, + // use the _without_consistency_checks utility method. This is similar to connecting blocks + // during startup prior to the node being full initialized. + mine_transaction_without_consistency_checks(&nodes[1], &as_closing_tx[0]); + + // After a timer tick a payment preimage ChannelMonitorUpdate is applied to the A<->B + // ChannelMonitor (possible twice), even though the channel has since been closed. + check_added_monitors(&nodes[1], 0); + let mons_added = if close_chans_before_reload && !close_only_a { 4 } else { 3 }; + if !close_chans_before_reload { + check_closed_broadcast(&nodes[1], 1, false); + let evs = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(evs.len(), 3, "{:?}", evs); + assert!(evs.iter().all(|e| matches!( + e, + Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. } + | Event::PaymentForwarded { .. } + ))); + check_added_monitors(&nodes[1], mons_added); + } + nodes[1].node.timer_tick_occurred(); + let expected_mons = if !close_chans_before_reload { 0 } else { mons_added }; + check_added_monitors(&nodes[1], expected_mons); + + // Finally, check that B created a payment preimage transaction and close out the payment. + let bs_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(bs_txn.len(), if close_chans_before_reload && !close_only_a { 2 } else { 1 }); + let bs_preimage_tx = bs_txn + .iter() + .find(|tx| tx.input[0].previous_output.txid == as_closing_tx[0].compute_txid()) + .unwrap(); + check_spends!(bs_preimage_tx, as_closing_tx[0]); + + mine_transactions(&nodes[0], &[&as_closing_tx[0], bs_preimage_tx]); + check_closed_broadcast(&nodes[0], 1, false); + expect_payment_sent(&nodes[0], payment_preimage, None, true, true); + + if close_chans_before_reload { + // For close_chans_before_reload, the deferred completions haven't been processed + // yet. Trigger process_pending_monitor_events now. + let _ = nodes[1].node.get_and_clear_pending_msg_events(); + // One of the monitor events was an HTLC update from the outbound edge containing the payment + // preimage, resulting in an inbound edge preimage update here. + check_added_monitors(&nodes[1], 1); + } + + if !close_chans_before_reload || close_only_a { + // Make sure the B<->C channel is still alive and well by sending a payment over it. + let mut reconnect_args = ReconnectArgs::new(&nodes[1], &nodes[2]); + reconnect_args.pending_responding_commitment_signed.1 = true; + reconnect_args.pending_raa.1 = true; + + reconnect_nodes(reconnect_args); + } + + // If the A<->B channel was closed before we reload, we'll replay the claim against it on + // reload, causing the `PaymentForwarded` event to get replayed. + let evs = nodes[1].node.get_and_clear_pending_events(); + if !close_chans_before_reload { + // PaymentForwarded already fired during get_and_clear_pending_events above. + assert!(evs.is_empty(), "{:?}", evs); + } else { + assert_eq!(evs.len(), 3, "{:?}", evs); + for ev in evs { + if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev { + if !claim_from_onchain_tx { + assert!(next_htlcs[0].user_channel_id.is_some()) + } + } else { + panic!("Unexpected event: {:?}", ev); + } + } + } + + if !close_chans_before_reload || close_only_a { + assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); + send_payment(&nodes[1], &[&nodes[2]], 100_000); + } +} + +#[test] +fn test_durable_preimages_on_closed_channel_a() { + do_test_durable_preimages_on_closed_channel(true, true); +} +#[test] +fn test_durable_preimages_on_closed_channel_b() { + do_test_durable_preimages_on_closed_channel(true, false); +} +#[test] +fn test_durable_preimages_on_closed_channel_c() { + do_test_durable_preimages_on_closed_channel(false, false); +} + fn do_test_reload_mon_update_completion_actions_legacy(close_during_reload: bool) { // Test that if a `ChannelMonitorUpdate` completes but a `ChannelManager` isn't serialized // before restart we run the monitor update completion action on startup. @@ -4048,8 +4276,14 @@ fn do_test_reload_mon_update_completion_actions_legacy(close_during_reload: bool let chain_mon; let node_b_reload; - let legacy_cfg = test_legacy_channel_config(); - let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[Some(legacy_cfg), None, None]); + // This test relies on RAA blocking behavior which is bypassed with persistent monitor + // events. Force it off so the test exercises the intended code path. + let mut legacy_cfg = test_legacy_channel_config(); + legacy_cfg.override_persistent_monitor_events = Some(false); + let mut cfg = test_default_channel_config(); + cfg.override_persistent_monitor_events = Some(false); + let node_chanmgrs = + create_node_chanmgrs(3, &node_cfgs, &[Some(legacy_cfg), Some(cfg.clone()), Some(cfg)]); let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); let node_b_id = nodes[1].node.get_our_node_id(); @@ -4159,6 +4393,167 @@ fn test_reload_mon_update_completion_actions_legacy() { do_test_reload_mon_update_completion_actions_legacy(false); } +#[test] +fn test_reload_mon_update_completion_actions() { + do_test_reload_mon_update_completion_actions(true); + do_test_reload_mon_update_completion_actions(false); +} + +fn do_test_reload_mon_update_completion_actions(close_during_reload: bool) { + // With persistent monitor events, the outbound monitor generates an HTLCEvent when the + // commitment dance includes claimed_htlcs. Test that when the inbound edge's preimage + // update is still in-flight (InProgress), the ChannelManager has the preimage update + // pending and the outbound monitor retains the preimage HTLCEvent. Then reload and verify + // the event is re-provided and properly acked after processing. + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + + let persister; + let chain_mon; + let node_b_reload; + + let mut cfg = test_legacy_channel_config(); + cfg.override_persistent_monitor_events = Some(true); + let node_chanmgrs = + create_node_chanmgrs(3, &node_cfgs, &[Some(cfg.clone()), Some(cfg.clone()), Some(cfg)]); + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + if !nodes[1].node.test_persistent_monitor_events_enabled() { + return; + } + + let node_a_id = nodes[0].node.get_our_node_id(); + let node_b_id = nodes[1].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_id_ab = create_announced_chan_between_nodes(&nodes, 0, 1).2; + let chan_id_bc = create_announced_chan_between_nodes(&nodes, 1, 2).2; + + // Route a payment A -> B -> C, then claim it on C. + let (payment_preimage, payment_hash, ..) = + route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000); + + nodes[2].node.claim_funds(payment_preimage); + check_added_monitors(&nodes[2], 1); + expect_payment_claimed!(nodes[2], payment_hash, 1_000_000); + + // Hold the A<->B preimage monitor update as InProgress. + chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); + let mut cs_updates = get_htlc_update_msgs(&nodes[2], &node_b_id); + nodes[1].node.handle_update_fulfill_htlc(node_c_id, cs_updates.update_fulfill_htlcs.remove(0)); + check_added_monitors(&nodes[1], 1); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + + // Complete the B<->C commitment signed dance. + nodes[1].node.handle_commitment_signed_batch_test(node_c_id, &cs_updates.commitment_signed); + check_added_monitors(&nodes[1], 1); + let (bs_raa, bs_cs) = get_revoke_commit_msgs(&nodes[1], &node_c_id); + + nodes[2].node.handle_revoke_and_ack(node_b_id, &bs_raa); + check_added_monitors(&nodes[2], 1); + nodes[2].node.handle_commitment_signed_batch_test(node_b_id, &bs_cs); + check_added_monitors(&nodes[2], 1); + + // After handle_commitment_signed, the B<->C monitor has a preimage HTLCEvent from the + // LatestHolderCommitment update with claimed_htlcs. Check it's there before the RAA + // processes it. + assert!( + get_monitor!(nodes[1], chan_id_bc).has_pending_claim_monitor_events(), + "Expected the B<->C monitor to have a pending preimage HTLCEvent", + ); + + // The ChannelManager should have an in-flight PaymentPreimage update on the A<->B channel. + assert!( + nodes[1].node.has_in_flight_preimage_update(&node_a_id, &chan_id_ab), + "Expected an in-flight preimage monitor update on A<->B", + ); + + let cs_final_raa = get_event_msg!(nodes[2], MessageSendEvent::SendRevokeAndACK, node_b_id); + nodes[1].node.handle_revoke_and_ack(node_c_id, &cs_final_raa); + check_added_monitors(&nodes[1], 1); + + // Reload node B. The A<->B preimage update is still in-progress, and the B<->C monitor + // event hasn't been acked. + let mon_ab = get_monitor!(nodes[1], chan_id_ab).encode(); + let mon_bc = get_monitor!(nodes[1], chan_id_bc).encode(); + let manager_b = nodes[1].node.encode(); + reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, chain_mon, node_b_reload); + // Without setting this, nodes[1] may or may not generate a redundant preimage monitor update + // depending on what order the monitor events are provided in after restart. + nodes[1] + .chain_monitor + .deterministic_monitor_events + .store(true, core::sync::atomic::Ordering::Release); + + if close_during_reload { + // Test that we still free the B<->C channel if the A<->B channel closed while we + // reloaded (as learned about during the on-reload block connection). + let msg = "Channel force-closed".to_owned(); + let reason = ClosureReason::HolderForceClosed { + broadcasted_latest_txn: Some(true), + message: msg.clone(), + }; + nodes[0].node.force_close_broadcasting_latest_txn(&chan_id_ab, &node_b_id, msg).unwrap(); + check_added_monitors(&nodes[0], 1); + check_closed_broadcast(&nodes[0], 1, true); + check_closed_event(&nodes[0], 1, reason, &[node_b_id], 100_000); + let as_closing_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(as_closing_tx.len(), 1); + mine_transaction_without_consistency_checks(&nodes[1], &as_closing_tx[0]); + } + + // On reload, the A<->B preimage update should be detected as completed (the monitor has it + // applied). Processing events should re-provide the B<->C monitor's HTLCEvent, trigger the + // claim, and generate the PaymentForwarded event. + let mut events = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(events.len(), if close_during_reload { 2 } else { 1 }); + expect_payment_forwarded( + events.remove(0), + &nodes[1], + &nodes[0], + &nodes[2], + Some(1000), + None, + close_during_reload, + false, + false, + ); + if close_during_reload { + check_added_monitors(&nodes[1], 1); // force close + + match events[0] { + Event::ChannelClosed { .. } => {}, + _ => panic!("Expected ChannelClosed event"), + } + check_closed_broadcast(&nodes[1], 1, false); + } + + // After processing the PaymentForwarded event, the B<->C monitor event should be acked. + assert!( + !get_monitor!(nodes[1], chan_id_bc).has_pending_claim_monitor_events(), + "B<->C monitor should no longer have pending preimage HTLCEvent after event processing", + ); + + if !close_during_reload { + // Reconnect and verify channels still work. B has a pending fulfill for A since the + // A<->B preimage update completed during reload. + nodes[0].node.peer_disconnected(node_b_id); + let mut reconnect_ab = ReconnectArgs::new(&nodes[0], &nodes[1]); + reconnect_ab.pending_htlc_claims = (1, 0); + reconnect_nodes(reconnect_ab); + expect_payment_sent(&nodes[0], payment_preimage, None, true, true); + + nodes[2].node.peer_disconnected(node_b_id); + reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[2])); + send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000); + } else { + // Verify B<->C still operates fine after A<->B was closed. + nodes[2].node.peer_disconnected(node_b_id); + reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[2])); + send_payment(&nodes[1], &[&nodes[2]], 100_000); + } +} + fn do_test_glacial_peer_cant_hang(hold_chan_a: bool) { // Test that if a peer manages to send an `update_fulfill_htlc` message without a // `commitment_signed`, disconnects, then replays the `update_fulfill_htlc` message it doesn't @@ -4214,6 +4609,66 @@ fn do_test_glacial_peer_cant_hang(hold_chan_a: bool) { if !hold_chan_a { expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], Some(1000), false, false); send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000); + } else if nodes[1].node.test_persistent_monitor_events_enabled() { + assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + + let (route, payment_hash_2, payment_preimage_2, payment_secret_2) = + get_route_and_payment_hash!(&nodes[1], nodes[2], 1_000_000); + + // With persistent monitor events, the duplicate claim from the HTLCEvent was already + // processed during reconnect, so the B<->C channel is no longer blocked and the + // second payment goes through immediately. + let onion_2 = RecipientOnionFields::secret_only(payment_secret_2, 1_000_000); + let id_2 = PaymentId(payment_hash_2.0); + nodes[1].node.send_payment_with_route(route, payment_hash_2, onion_2, id_2).unwrap(); + check_added_monitors(&nodes[1], 1); + + assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); + let msg_events = nodes[1].node.get_and_clear_pending_msg_events(); + assert_eq!(msg_events.len(), 1); + assert!( + matches!(&msg_events[0], MessageSendEvent::UpdateHTLCs { node_id, .. } if *node_id == node_c_id) + ); + let c_update = msg_events.into_iter().next().unwrap(); + + // Complete the A<->B channel preimage persistence, which sends the fulfill to A. + let (ab_update_id, _) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab); + assert!(nodes[1] + .chain_monitor + .chain_monitor + .channel_monitor_updated(chan_id_ab, ab_update_id) + .is_ok()); + + let msg_events = nodes[1].node.get_and_clear_pending_msg_events(); + assert_eq!(msg_events.len(), 1); + check_added_monitors(&nodes[1], 0); + let a_update = match &msg_events[0] { + MessageSendEvent::UpdateHTLCs { node_id, updates, .. } => { + assert_eq!(*node_id, node_a_id); + updates.clone() + }, + _ => panic!("Unexpected event"), + }; + + let update_fulfill = a_update.update_fulfill_htlcs[0].clone(); + nodes[0].node.handle_update_fulfill_htlc(node_b_id, update_fulfill); + let commitment = &a_update.commitment_signed; + do_commitment_signed_dance(&nodes[0], &nodes[1], commitment, false, false); + expect_payment_sent(&nodes[0], payment_preimage, None, true, true); + expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], Some(1000), false, false); + + pass_along_path( + &nodes[1], + &[&nodes[2]], + 1_000_000, + payment_hash_2, + Some(payment_secret_2), + c_update, + true, + None, + ); + claim_payment(&nodes[1], &[&nodes[2]], payment_preimage_2); } else { assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); @@ -4539,9 +4994,17 @@ fn test_claim_to_closed_channel_blocks_forwarded_preimage_removal() { // This tests that behavior. let chanmon_cfgs = create_chanmon_cfgs(3); let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); - let legacy_cfg = test_legacy_channel_config(); - let node_chanmgrs = - create_node_chanmgrs(3, &node_cfgs, &[Some(legacy_cfg.clone()), Some(legacy_cfg), None]); + let mut legacy_cfg = test_legacy_channel_config(); + let mut cfg = test_default_channel_config(); + // If persistent_monitor_events is set, there is no need to block RAAs. + legacy_cfg.override_persistent_monitor_events = Some(false); + cfg.override_persistent_monitor_events = Some(false); + + let node_chanmgrs = create_node_chanmgrs( + 3, + &node_cfgs, + &[Some(legacy_cfg.clone()), Some(legacy_cfg), Some(cfg)], + ); let nodes = create_network(3, &node_cfgs, &node_chanmgrs); let node_a_id = nodes[0].node.get_our_node_id(); @@ -4955,6 +5418,7 @@ fn native_async_persist() { nodes[0].chain_monitor.monitor_updates.lock().unwrap().remove(&chan_id).unwrap(); updates = mon_updates.into_iter().collect::>(); assert!(updates.len() >= 4, "The test below needs at least four updates"); + let persistent_monitor_events = nodes[0].node.test_persistent_monitor_events_enabled(); core::mem::drop(nodes); core::mem::drop(node_chanmgrs); @@ -5091,7 +5555,16 @@ fn native_async_persist() { assert_eq!(update_status, ChannelMonitorUpdateStatus::InProgress); persist_futures.poll_futures(); - assert_eq!(async_chain_monitor.release_pending_monitor_events().len(), 0); + let events = async_chain_monitor.release_pending_monitor_events(); + if persistent_monitor_events { + // With persistent monitor events, the LatestHolderCommitmentTXInfo update containing + // claimed_htlcs generates an HTLCEvent with the preimage. + assert_eq!(events.len(), 1); + assert_eq!(events[0].2.len(), 1); + assert!(matches!(events[0].2[0].1, MonitorEvent::HTLCEvent(..))); + } else { + assert!(events.is_empty(), "Expected 0 events, got {events:?}"); + } let pending_writes = kv_store.list_pending_async_writes( CHANNEL_MONITOR_UPDATE_PERSISTENCE_PRIMARY_NAMESPACE, diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index e2e70eef8d9..deaa9bbd0ef 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1621,6 +1621,20 @@ pub(crate) enum EventCompletionAction { AckMonitorEvent { event_source: MonitorEventSource }, } +impl EventCompletionAction { + /// If this action has a `monitor_event_id` field, set it to the given value. + /// This is used to attach a persistent monitor event ack to an existing completion + /// action, so the event gets acked when the user processes the corresponding Event. + fn insert_monitor_event_id(&mut self, id: u64) { + match self { + EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(update) => { + update.monitor_event_id = Some(id); + }, + _ => {}, + } + } +} + impl_writeable_tlv_based_enum!(EventCompletionAction, (0, ReleaseRAAChannelMonitorUpdate) => { (0, channel_funding_outpoint, option), @@ -9239,6 +9253,26 @@ impl< let push_forward_htlcs_failure = |prev_outbound_scid_alias: u64, failure: HTLCForwardInfo| { + // If the HTLC is still pending decoding (e.g. because the ChannelManager + // was persisted before the HTLC was forwarded but the ChannelMonitor was + // not), remove it to avoid a duplicate forward or intercept attempt. + if self.persistent_monitor_events { + let htlc_id = match &failure { + HTLCForwardInfo::FailHTLC { htlc_id, .. } + | HTLCForwardInfo::FailMalformedHTLC { htlc_id, .. } => Some(*htlc_id), + _ => None, + }; + if let Some(htlc_id) = htlc_id { + let mut decode_htlcs = self.decode_update_add_htlcs.lock().unwrap(); + if let Some(htlcs) = decode_htlcs.get_mut(&prev_outbound_scid_alias) { + htlcs.retain(|htlc| htlc.htlc_id != htlc_id); + if htlcs.is_empty() { + decode_htlcs.remove(&prev_outbound_scid_alias); + } + } + } + } + let mut forward_htlcs = self.forward_htlcs.lock().unwrap(); match forward_htlcs.entry(prev_outbound_scid_alias) { hash_map::Entry::Occupied(mut entry) => { @@ -9737,6 +9771,22 @@ impl< } (None, None) } else if definitely_duplicate { + if self.persistent_monitor_events { + // If persistent_monitor_events is enabled and this is a duplicate claim, we need only + // ack the monitor event corresponding to the claim; an event for the payment forward + // was already generated. + if let Some(event_src) = monitor_event_source { + return ( + Some(MonitorUpdateCompletionAction::AckMonitorEvents { + monitor_events_to_ack: vec![event_src], + }), + None, + ); + } else { + // We may hit this case if a peer replayed an update_fulfill_htlc after a reconnect. + return (None, None); + } + } ( Some(MonitorUpdateCompletionAction::FreeDuplicateClaimImmediately { downstream_counterparty_node_id: chan_to_release.counterparty_node_id, @@ -9974,9 +10024,17 @@ impl< } else if matches!( action, MonitorUpdateCompletionAction::PaymentClaimed { .. } + | MonitorUpdateCompletionAction::AckMonitorEvents { .. } ) { - debug_assert!(during_init, - "Duplicate claims should always either be for forwarded payments(freeing another channel immediately) or during init (for claim replay)"); + // If persistent_monitor_events is enabled, we always claim immediately upon + // receiving an update_fulfill_htlc from the outbound edge and then also when we + // hear from the outbound edge ChannelMonitor via MonitorEvent that the preimage + // was included in the latest commitment, so duplicate claims from the monitor + // event are expected here. + if !self.persistent_monitor_events { + debug_assert!(during_init, + "Duplicate claims should always either be for forwarded payments(freeing another channel immediately) or during init (for claim replay)"); + } mem::drop(per_peer_state); self.handle_monitor_update_completion_actions([action]); } else { @@ -10129,6 +10187,10 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ monitor_event_id, }; Some(EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(release)) + } else if let Some(event_id) = monitor_event_id { + Some(EventCompletionAction::AckMonitorEvent { + event_source: MonitorEventSource { channel_id: next_channel_id, event_id }, + }) } else { Some(EventCompletionAction::ReleaseRAAChannelMonitorUpdate { channel_funding_outpoint: Some(next_channel_outpoint), @@ -10156,15 +10218,31 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ ); // If an event was generated, `claim_htlc` set `ev_completion_action` to None, if // not, we should go ahead and run it now (as the claim was duplicative), at least - // if a PaymentClaimed event with the same action isn't already pending. - let have_action = if ev_completion_action.is_some() { - let pending_events = self.pending_events.lock().unwrap(); - pending_events.iter().any(|(_, act)| *act == ev_completion_action) - } else { - false - }; - if !have_action { - self.handle_post_event_actions(ev_completion_action); + // if a PaymentSent event with the same action isn't already pending. + if let Some(action) = ev_completion_action { + let mut pending_events = self.pending_events.lock().unwrap(); + // If there's a pending PaymentSent for this payment, attach the + // monitor_event_id to its existing action so the persistent monitor + // event gets acked when the user processes PaymentSent. + let handled = pending_events.iter_mut().any(|(ev, act)| { + if let events::Event::PaymentSent { payment_id: Some(ev_id), .. } = ev { + if *ev_id == payment_id { + if let Some(ref mut existing) = act { + if let EventCompletionAction::AckMonitorEvent { event_source } = + &action + { + existing.insert_monitor_event_id(event_source.event_id); + } + return true; + } + } + } + act.as_ref() == Some(&action) + }); + if !handled { + mem::drop(pending_events); + self.handle_post_event_actions(Some(action)); + } } }, HTLCSource::PreviousHopData(hop_data) => { @@ -12723,23 +12801,32 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ chan.update_fulfill_htlc(&msg), chan_entry ); - let prev_hops = match &res.0 { - HTLCSource::PreviousHopData(prev_hop) => vec![prev_hop], - HTLCSource::TrampolineForward { previous_hop_data, .. } => { - previous_hop_data.iter().collect() - }, - _ => vec![], - }; - let logger = WithChannelContext::from(&self.logger, &chan.context, None); - for prev_hop in prev_hops { - log_trace!(logger, - "Holding the next revoke_and_ack until the preimage is durably persisted in the inbound edge's ChannelMonitor", - ); - peer_state - .actions_blocking_raa_monitor_updates - .entry(msg.channel_id) - .or_insert_with(Vec::new) - .push(RAAMonitorUpdateBlockingAction::from_prev_hop_data(prev_hop)); + // If persistent_monitor_events is enabled, there is never a need to block an outbound + // edge's RAA monitor update, because we rely on the outbound edge's monitor event + // containing the payment preimage to continue being re-provided to us until the + // preimage is durably in the inbound edge. + if !self.persistent_monitor_events { + let prev_hops = match &res.0 { + HTLCSource::PreviousHopData(prev_hop) => vec![prev_hop], + HTLCSource::TrampolineForward { previous_hop_data, .. } => { + previous_hop_data.iter().collect() + }, + _ => vec![], + }; + let logger = + WithChannelContext::from(&self.logger, &chan.context, None); + for prev_hop in prev_hops { + log_trace!(logger, + "Holding the next revoke_and_ack until the preimage is durably persisted in the inbound edge's ChannelMonitor", + ); + peer_state + .actions_blocking_raa_monitor_updates + .entry(msg.channel_id) + .or_insert_with(Vec::new) + .push(RAAMonitorUpdateBlockingAction::from_prev_hop_data( + prev_hop, + )); + } } // Note that we do not need to push an `actions_blocking_raa_monitor_updates` @@ -13019,6 +13106,31 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ }) } + #[cfg(test)] + pub(crate) fn test_persistent_monitor_events_enabled(&self) -> bool { + self.persistent_monitor_events + } + + /// Returns true if there is an in-flight `ChannelMonitorUpdate` containing a + /// `PaymentPreimage` step for the given channel. + #[cfg(test)] + pub(crate) fn has_in_flight_preimage_update( + &self, counterparty_node_id: &PublicKey, channel_id: &ChannelId, + ) -> bool { + let per_peer_state = self.per_peer_state.read().unwrap(); + if let Some(peer_state_mtx) = per_peer_state.get(counterparty_node_id) { + let peer_state = peer_state_mtx.lock().unwrap(); + if let Some((_, updates)) = peer_state.in_flight_monitor_updates.get(channel_id) { + return updates.iter().any(|u| { + u.updates.iter().any(|step| { + matches!(step, crate::chain::channelmonitor::ChannelMonitorUpdateStep::PaymentPreimage { .. }) + }) + }); + } + } + false + } + #[cfg(any(test, feature = "_test_utils"))] pub(crate) fn test_raa_monitor_updates_held( &self, counterparty_node_id: PublicKey, channel_id: ChannelId, @@ -13717,6 +13829,18 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ Some(htlc_update.payment_hash), ); if let Some(preimage) = htlc_update.payment_preimage { + let from_onchain = self + .per_peer_state + .read() + .unwrap() + .get(&counterparty_node_id) + .map_or(true, |peer_state_mtx| { + !peer_state_mtx + .lock() + .unwrap() + .channel_by_id + .contains_key(&channel_id) + }); log_trace!( logger, "Claiming HTLC with preimage {} from our monitor", @@ -13729,7 +13853,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), htlc_update.skimmed_fee_msat, - true, + from_onchain, counterparty_node_id, funding_outpoint, channel_id, @@ -19922,7 +20046,7 @@ impl< } } - if is_channel_closed { + if is_channel_closed && !persistent_monitor_events { for (htlc_source, (htlc, preimage_opt)) in monitor.get_all_current_outbound_htlcs() { diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs index 81fb99819d3..f3aa09f6d21 100644 --- a/lightning/src/ln/functional_tests.rs +++ b/lightning/src/ln/functional_tests.rs @@ -7384,6 +7384,7 @@ pub fn test_update_err_monitor_lockdown() { ) .unwrap() .1; + new_monitor.copy_monitor_event_state(&monitor); assert!(new_monitor == *monitor); new_monitor }; @@ -7492,6 +7493,7 @@ pub fn test_concurrent_monitor_claim() { ) .unwrap() .1; + new_monitor.copy_monitor_event_state(&monitor); assert!(new_monitor == *monitor); new_monitor }; @@ -7542,6 +7544,7 @@ pub fn test_concurrent_monitor_claim() { ) .unwrap() .1; + new_monitor.copy_monitor_event_state(&monitor); assert!(new_monitor == *monitor); new_monitor }; @@ -7875,6 +7878,14 @@ fn do_test_onchain_htlc_settlement_after_close( check_added_monitors(&nodes[1], 2); let events = nodes[1].node.get_and_clear_pending_msg_events(); + if go_onchain_before_fulfill && nodes[1].node.test_persistent_monitor_events_enabled() { + // If persistent monitor events are enabled, Bob processed an monitor event HTLC update + // resulting in a redundant preimage monitor update and forward event here. + check_added_monitors(&nodes[1], 1); + let events = nodes[1].node.get_and_clear_pending_msg_events(); + assert!(events.is_empty()); + expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], fee, went_onchain, false); + } assert_eq!(events.len(), 2); let bob_revocation = match events[0] { MessageSendEvent::SendRevokeAndACK { ref node_id, ref msg } => { @@ -7897,6 +7908,17 @@ fn do_test_onchain_htlc_settlement_after_close( check_added_monitors(&nodes[2], 1); let events = nodes[2].node.get_and_clear_pending_msg_events(); + if !go_onchain_before_fulfill + && !broadcast_alice + && nodes[1].node.test_persistent_monitor_events_enabled() + { + // If persistent monitor events are enabled, Bob processed an monitor event HTLC update + // resulting in a redundant preimage monitor update and forward event here. + check_added_monitors(&nodes[1], 1); + let events = nodes[1].node.get_and_clear_pending_msg_events(); + assert!(events.is_empty()); + expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], fee, went_onchain, false); + } assert_eq!(events.len(), 1); let carol_revocation = match events[0] { MessageSendEvent::SendRevokeAndACK { ref node_id, ref msg } => { diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 4c97df2f42e..fe1e270d081 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3411,6 +3411,8 @@ fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool, p2a_anchor: bo let mut cfg = test_default_channel_config(); cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true; cfg.channel_handshake_config.negotiate_anchor_zero_fee_commitments = p2a_anchor; + // This test no longer applies when MonitorEvents become persistent. + cfg.override_persistent_monitor_events = Some(false); let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())]; let chanmon_cfgs = create_chanmon_cfgs(3); diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs index 6d8b727f81a..5a8f5fafe03 100644 --- a/lightning/src/ln/payment_tests.rs +++ b/lightning/src/ln/payment_tests.rs @@ -775,7 +775,7 @@ fn do_retry_with_no_persist(confirm_before_reload: bool) { // always persisted asynchronously), the ChannelManager has to reload some payment data from // ChannelMonitor(s) in some cases. This tests that reloading. // - // `confirm_before_reload` confirms the channel-closing commitment transaction on-chain prior + // `confirm_before_reload` confirms the A<>B channel-closing commitment transaction on-chain prior // to reloading the ChannelManager, increasing test coverage in ChannelMonitor HTLC tracking // which has separate codepaths for "commitment transaction already confirmed" and not. let chanmon_cfgs = create_chanmon_cfgs(3); @@ -920,8 +920,64 @@ fn do_retry_with_no_persist(confirm_before_reload: bool) { let fulfill_msg = htlc_fulfill.update_fulfill_htlcs.remove(0); nodes[1].node.handle_update_fulfill_htlc(node_c_id, fulfill_msg); check_added_monitors(&nodes[1], 1); - do_commitment_signed_dance(&nodes[1], &nodes[2], &htlc_fulfill.commitment_signed, false, false); - expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], None, true, false); + if nodes[1].node.test_persistent_monitor_events_enabled() { + // With persistent monitor events, the B<->C monitor's preimage HTLCEvent triggers a + // redundant claim on the closed A<->B channel during the commitment dance. Unroll the + // dance to account for the extra PaymentPreimage monitor update. + nodes[1] + .node + .handle_commitment_signed_batch_test(node_c_id, &htlc_fulfill.commitment_signed); + check_added_monitors(&nodes[1], 1); + let (bs_raa, bs_cs) = get_revoke_commit_msgs(&nodes[1], &node_c_id); + // get_revoke_commit_msgs triggered process_pending_monitor_events which processed + // the HTLCEvent, generating the extra preimage update on A<->B. + check_added_monitors(&nodes[1], 1); + nodes[2].node.handle_revoke_and_ack(node_b_id, &bs_raa); + check_added_monitors(&nodes[2], 1); + nodes[2].node.handle_commitment_signed_batch_test(node_b_id, &bs_cs); + check_added_monitors(&nodes[2], 1); + let cs_raa = get_event_msg!(nodes[2], MessageSendEvent::SendRevokeAndACK, node_b_id); + nodes[1].node.handle_revoke_and_ack(node_c_id, &cs_raa); + check_added_monitors(&nodes[1], 1); + } else { + do_commitment_signed_dance( + &nodes[1], + &nodes[2], + &htlc_fulfill.commitment_signed, + false, + false, + ); + } + let mut events = nodes[1].node.get_and_clear_pending_events(); + expect_payment_forwarded( + events.remove(0), + &nodes[1], + &nodes[0], + &nodes[2], + None, + None, + true, + false, + false, + ); + if nodes[1].node.test_persistent_monitor_events_enabled() { + // With persistent monitor events, the outbound monitor's preimage HTLCEvent triggers a + // duplicate claim on the closed inbound edge, generating a second PaymentForwarded event. + assert_eq!(events.len(), 1); + expect_payment_forwarded( + events.remove(0), + &nodes[1], + &nodes[0], + &nodes[2], + None, + None, + true, + false, + false, + ); + } else { + assert_eq!(events.len(), 0); + } if confirm_before_reload { let best_block = nodes[0].blocks.lock().unwrap().last().unwrap().clone(); @@ -1406,7 +1462,15 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload( // handled, but while processing the pending `MonitorEvent`s (which were not processed // before the monitor was persisted) we will end up with a duplicate // ChannelMonitorUpdate. - check_added_monitors(&nodes[0], 2); + if nodes[0].node.test_persistent_monitor_events_enabled() { + // If persistent monitor events is enabled, we do not reconstruct pending closed channel + // HTLCs based on polling the monitors and instead reconstruct based on MonitorEvents that + // were still un-acked on shutdown. In this case, the monitor event for the payment was + // ack'ed so we avoid the duplicate monitor update. + check_added_monitors(&nodes[0], 1); + } else { + check_added_monitors(&nodes[0], 2); + } } else { // ...unless we got the PaymentSent event, in which case we have de-duplication logic // preventing a redundant monitor event. diff --git a/lightning/src/util/test_utils.rs b/lightning/src/util/test_utils.rs index f4eb6287921..35011bae4c8 100644 --- a/lightning/src/util/test_utils.rs +++ b/lightning/src/util/test_utils.rs @@ -521,6 +521,9 @@ pub struct TestChainMonitor<'a> { /// deferred operations. This allows tests to control exactly when queued monitor updates /// are applied to the in-memory monitor. pub pause_flush: AtomicBool, + /// When set to `true`, `release_pending_monitor_events` sorts events by `ChannelId` to + /// ensure deterministic processing order regardless of HashMap iteration order. + pub deterministic_monitor_events: AtomicBool, /// Buffer of the last 20 monitor updates, most recent first. pub recent_monitor_updates: Mutex>, } @@ -583,6 +586,7 @@ impl<'a> TestChainMonitor<'a> { #[cfg(feature = "std")] write_blocker: Mutex::new(None), pause_flush: AtomicBool::new(false), + deterministic_monitor_events: AtomicBool::new(false), recent_monitor_updates: Mutex::new(Vec::new()), } } @@ -745,7 +749,11 @@ impl<'a> chain::Watch for TestChainMonitor<'a> { let count = self.chain_monitor.pending_operation_count(); self.chain_monitor.flush(count, &self.logger); } - return self.chain_monitor.release_pending_monitor_events(); + let mut events = self.chain_monitor.release_pending_monitor_events(); + if self.deterministic_monitor_events.load(Ordering::Acquire) { + events.sort_by_key(|(_, channel_id, _, _)| *channel_id); + } + events } fn ack_monitor_event(&self, source: MonitorEventSource) { From 01fab23623ba1b48f478c0023f633b1d2fa3fecc Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Mon, 30 Mar 2026 15:00:24 -0400 Subject: [PATCH 28/28] Update completion for persistent mon events w/ fwd event If persistent_monitor_events is enabled, for forwarded HTLC claims there is no need to block the outbound edge's RAA monitor update until the preimage makes it into the inbound edge -- the outbound edge's preimage monitor event is persistent and will keep being returned to us until the inbound edge gets the preimage, instead. Therefore, clean up the claim flow when persistent monitor events is enabled to omit any usage of/reference to RAA monitor update blocking actions. --- lightning/src/ln/channelmanager.rs | 78 +++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index deaa9bbd0ef..11dea8cf4a9 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1512,6 +1512,17 @@ pub(crate) enum MonitorUpdateCompletionAction { /// action is handled. monitor_event_source: Option, }, + /// Indicates an [`events::Event`] should be surfaced to the user once the associated + /// [`ChannelMonitorUpdate`] has been durably persisted. + /// + /// This is used when `persistent_monitor_events` is enabled for forwarded payment claims. + /// Unlike [`Self::EmitEventOptionAndFreeOtherChannel`], this variant does not carry any + /// channel-unblocking information because persistent monitor events never add + /// [`RAAMonitorUpdateBlockingAction::ForwardedPaymentInboundClaim`] blockers. + EmitEventAndAckMonitorEvent { + event: events::Event, + monitor_event_source: Option, + }, /// Indicates that one or more [`MonitorEvent`]s should be acknowledged via /// [`chain::Watch::ack_monitor_event`] once the associated [`ChannelMonitorUpdate`] has been /// durably persisted. @@ -1542,6 +1553,10 @@ impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction, (3, AckMonitorEvents) => { (0, monitor_events_to_ack, required_vec), }, + (4, EmitEventAndAckMonitorEvent) => { + (0, event, upgradable_required), + (1, monitor_event_source, option), + }, ); /// Result of attempting to resume a channel after a monitor update completes while locks are held. @@ -9798,20 +9813,48 @@ impl< ) } else { let event = make_payment_forwarded_event(htlc_claim_value_msat); - if let Some(ref payment_forwarded) = event { - debug_assert!(matches!( - payment_forwarded, - &events::Event::PaymentForwarded { .. } - )); + if self.persistent_monitor_events { + match (event, monitor_event_source) { + // Typically monitor_event_source is None here. The usual flow is: receive + // update_fulfill_htlc from outbound edge peer, call claim_funds with + // monitor_event_source = None, generate event here. Then, preimage gets into + // outbound edge monitor, get a preimage monitor event, call claim_funds again and + // hit the duplicate claim flow above. However, in the case that the ChannelManager + // is outdated and we got here after restart from an un-acked preimage monitor event, + // we can end up here with monitor event source = Some. + (Some(ev), mes) => ( + Some(MonitorUpdateCompletionAction::EmitEventAndAckMonitorEvent { + event: ev, + monitor_event_source: mes, + }), + None, + ), + (None, Some(mes)) => ( + Some(MonitorUpdateCompletionAction::AckMonitorEvents { + monitor_events_to_ack: vec![mes], + }), + None, + ), + (None, None) => (None, None), + } + } else { + if let Some(ref payment_forwarded) = event { + debug_assert!(matches!( + payment_forwarded, + &events::Event::PaymentForwarded { .. } + )); + } + ( + Some( + MonitorUpdateCompletionAction::EmitEventOptionAndFreeOtherChannel { + event, + downstream_counterparty_and_funding_outpoint: chan_to_release, + monitor_event_source, + }, + ), + None, + ) } - ( - Some(MonitorUpdateCompletionAction::EmitEventOptionAndFreeOtherChannel { - event, - downstream_counterparty_and_funding_outpoint: chan_to_release, - monitor_event_source, - }), - None, - ) } }, ); @@ -10601,6 +10644,15 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ Some(blocking_action), ); }, + MonitorUpdateCompletionAction::EmitEventAndAckMonitorEvent { + event, + monitor_event_source, + } => { + if let Some(source) = monitor_event_source { + self.chain_monitor.ack_monitor_event(source); + } + self.pending_events.lock().unwrap().push_back((event, None)); + }, MonitorUpdateCompletionAction::AckMonitorEvents { monitor_events_to_ack } => { for source in monitor_events_to_ack { self.chain_monitor.ack_monitor_event(source);