Skip to content

Commit 67ad6c4

Browse files
committed
Don't remove nodes if there's no channel_update for a temp failure
Previously, we were requiring any `UPDATE` onion errors to include a `channel_update`, as the spec mandates[1]. If we see an onion error which is missing one we treat it as a misbehaving node that isn't behaving according to the spec and simply remove the node. Sadly, it appears at least some versions of CLN are such nodes, and opt to not include `channel_update` at all if they're returning a `temporary_channel_failure`. This causes us to completely remove CLN nodes from our graph after they fail to forward our HTLC. While CLN is violating the spec here, there's not a lot of reason to not allow it, so we go ahead and do so here, treating it simply as any other failure by letting the scorer handle it. [1] The spec says `Please note that the channel_update field is mandatory in messages whose failure_code includes the UPDATE flag` however doesn't repeat it in the requirements section so its not crazy that someone missed it when implementing.
1 parent 3dcd490 commit 67ad6c4

File tree

3 files changed

+59
-50
lines changed

3 files changed

+59
-50
lines changed

fuzz/src/router.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ pub fn do_test<Out: test_logger::Output>(data: &[u8], out: Out) {
227227
},
228228
4 => {
229229
let short_channel_id = slice_to_be64(get_slice!(8));
230-
net_graph.channel_failed(short_channel_id, false);
230+
net_graph.channel_failed_permanent(short_channel_id);
231231
},
232232
_ if node_pks.is_empty() => {},
233233
_ => {

lightning/src/ln/onion_utils.rs

+36-15
Original file line numberDiff line numberDiff line change
@@ -493,21 +493,28 @@ pub(super) fn process_onion_failure<T: secp256k1::Signing, L: Deref>(secp_ctx: &
493493
} else {
494494
log_trace!(logger, "Failure provided features a channel update without type prefix. Deprecated, but allowing for now.");
495495
}
496-
if let Ok(chan_update) = msgs::ChannelUpdate::read(&mut Cursor::new(&update_slice)) {
496+
let update_opt = msgs::ChannelUpdate::read(&mut Cursor::new(&update_slice));
497+
if update_opt.is_ok() || update_slice.is_empty() {
497498
// if channel_update should NOT have caused the failure:
498499
// MAY treat the channel_update as invalid.
499500
let is_chan_update_invalid = match error_code & 0xff {
500501
7 => false,
501-
11 => amt_to_forward > chan_update.contents.htlc_minimum_msat,
502-
12 => amt_to_forward
503-
.checked_mul(chan_update.contents.fee_proportional_millionths as u64)
502+
11 => update_opt.is_ok() &&
503+
amt_to_forward >
504+
update_opt.as_ref().unwrap().contents.htlc_minimum_msat,
505+
12 => update_opt.is_ok() && amt_to_forward
506+
.checked_mul(update_opt.as_ref().unwrap()
507+
.contents.fee_proportional_millionths as u64)
504508
.map(|prop_fee| prop_fee / 1_000_000)
505-
.and_then(|prop_fee| prop_fee.checked_add(chan_update.contents.fee_base_msat as u64))
509+
.and_then(|prop_fee| prop_fee.checked_add(
510+
update_opt.as_ref().unwrap().contents.fee_base_msat as u64))
506511
.map(|fee_msats| route_hop.fee_msat >= fee_msats)
507512
.unwrap_or(false),
508-
13 => route_hop.cltv_expiry_delta as u16 >= chan_update.contents.cltv_expiry_delta,
513+
13 => update_opt.is_ok() &&
514+
route_hop.cltv_expiry_delta as u16 >=
515+
update_opt.as_ref().unwrap().contents.cltv_expiry_delta,
509516
14 => false, // expiry_too_soon; always valid?
510-
20 => chan_update.contents.flags & 2 == 0,
517+
20 => update_opt.as_ref().unwrap().contents.flags & 2 == 0,
511518
_ => false, // unknown error code; take channel_update as valid
512519
};
513520
if is_chan_update_invalid {
@@ -518,17 +525,31 @@ pub(super) fn process_onion_failure<T: secp256k1::Signing, L: Deref>(secp_ctx: &
518525
is_permanent: true,
519526
});
520527
} else {
521-
// Make sure the ChannelUpdate contains the expected
522-
// short channel id.
523-
if failing_route_hop.short_channel_id == chan_update.contents.short_channel_id {
524-
short_channel_id = Some(failing_route_hop.short_channel_id);
528+
if let Ok(chan_update) = update_opt {
529+
// Make sure the ChannelUpdate contains the expected
530+
// short channel id.
531+
if failing_route_hop.short_channel_id == chan_update.contents.short_channel_id {
532+
short_channel_id = Some(failing_route_hop.short_channel_id);
533+
} else {
534+
log_info!(logger, "Node provided a channel_update for which it was not authoritative, ignoring.");
535+
}
536+
network_update = Some(NetworkUpdate::ChannelUpdateMessage {
537+
msg: chan_update,
538+
})
525539
} else {
526-
log_info!(logger, "Node provided a channel_update for which it was not authoritative, ignoring.");
540+
network_update = Some(NetworkUpdate::ChannelFailure {
541+
short_channel_id: route_hop.short_channel_id,
542+
is_permanent: false,
543+
});
527544
}
528-
network_update = Some(NetworkUpdate::ChannelUpdateMessage {
529-
msg: chan_update,
530-
})
531545
};
546+
} else {
547+
// If the channel_update had a non-zero length (i.e. was
548+
// present) but we couldn't read it, treat it as a total
549+
// node failure.
550+
log_info!(logger,
551+
"Failed to read a channel_update of len {} in an onion",
552+
update_slice.len());
532553
}
533554
}
534555
}

lightning/src/routing/gossip.rs

+22-34
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ pub enum NetworkUpdate {
212212
msg: ChannelUpdate,
213213
},
214214
/// An error indicating that a channel failed to route a payment, which should be applied via
215-
/// [`NetworkGraph::channel_failed`].
215+
/// [`NetworkGraph::channel_failed_permanent`] if permanent.
216216
ChannelFailure {
217217
/// The short channel id of the closed channel.
218218
short_channel_id: u64,
@@ -352,9 +352,10 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
352352
let _ = self.update_channel(msg);
353353
},
354354
NetworkUpdate::ChannelFailure { short_channel_id, is_permanent } => {
355-
let action = if is_permanent { "Removing" } else { "Disabling" };
356-
log_debug!(self.logger, "{} channel graph entry for {} due to a payment failure.", action, short_channel_id);
357-
self.channel_failed(short_channel_id, is_permanent);
355+
if is_permanent {
356+
log_debug!(self.logger, "Removing channel graph entry for {} due to a payment failure.", short_channel_id);
357+
self.channel_failed_permanent(short_channel_id);
358+
}
358359
},
359360
NetworkUpdate::NodeFailure { ref node_id, is_permanent } => {
360361
if is_permanent {
@@ -1632,40 +1633,27 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
16321633
Ok(())
16331634
}
16341635

1635-
/// Marks a channel in the graph as failed if a corresponding HTLC fail was sent.
1636-
/// If permanent, removes a channel from the local storage.
1637-
/// May cause the removal of nodes too, if this was their last channel.
1638-
/// If not permanent, makes channels unavailable for routing.
1639-
pub fn channel_failed(&self, short_channel_id: u64, is_permanent: bool) {
1636+
/// Marks a channel in the graph as failed permanently.
1637+
///
1638+
/// The channel and any node for which this was their last channel are removed from the graph.
1639+
pub fn channel_failed_permanent(&self, short_channel_id: u64) {
16401640
#[cfg(feature = "std")]
16411641
let current_time_unix = Some(SystemTime::now().duration_since(UNIX_EPOCH).expect("Time must be > 1970").as_secs());
16421642
#[cfg(not(feature = "std"))]
16431643
let current_time_unix = None;
16441644

1645-
self.channel_failed_with_time(short_channel_id, is_permanent, current_time_unix)
1645+
self.channel_failed_permanent_with_time(short_channel_id, current_time_unix)
16461646
}
16471647

1648-
/// Marks a channel in the graph as failed if a corresponding HTLC fail was sent.
1649-
/// If permanent, removes a channel from the local storage.
1650-
/// May cause the removal of nodes too, if this was their last channel.
1651-
/// If not permanent, makes channels unavailable for routing.
1652-
fn channel_failed_with_time(&self, short_channel_id: u64, is_permanent: bool, current_time_unix: Option<u64>) {
1648+
/// Marks a channel in the graph as failed permanently.
1649+
///
1650+
/// The channel and any node for which this was their last channel are removed from the graph.
1651+
fn channel_failed_permanent_with_time(&self, short_channel_id: u64, current_time_unix: Option<u64>) {
16531652
let mut channels = self.channels.write().unwrap();
1654-
if is_permanent {
1655-
if let Some(chan) = channels.remove(&short_channel_id) {
1656-
let mut nodes = self.nodes.write().unwrap();
1657-
self.removed_channels.lock().unwrap().insert(short_channel_id, current_time_unix);
1658-
Self::remove_channel_in_nodes(&mut nodes, &chan, short_channel_id);
1659-
}
1660-
} else {
1661-
if let Some(chan) = channels.get_mut(&short_channel_id) {
1662-
if let Some(one_to_two) = chan.one_to_two.as_mut() {
1663-
one_to_two.enabled = false;
1664-
}
1665-
if let Some(two_to_one) = chan.two_to_one.as_mut() {
1666-
two_to_one.enabled = false;
1667-
}
1668-
}
1653+
if let Some(chan) = channels.remove(&short_channel_id) {
1654+
let mut nodes = self.nodes.write().unwrap();
1655+
self.removed_channels.lock().unwrap().insert(short_channel_id, current_time_unix);
1656+
Self::remove_channel_in_nodes(&mut nodes, &chan, short_channel_id);
16691657
}
16701658
}
16711659

@@ -2450,7 +2438,7 @@ pub(crate) mod tests {
24502438
assert!(network_graph.read_only().channels().get(&short_channel_id).unwrap().one_to_two.is_some());
24512439
}
24522440

2453-
// Non-permanent closing just disables a channel
2441+
// Non-permanent failure doesn't touch the channel at all
24542442
{
24552443
match network_graph.read_only().channels().get(&short_channel_id) {
24562444
None => panic!(),
@@ -2467,7 +2455,7 @@ pub(crate) mod tests {
24672455
match network_graph.read_only().channels().get(&short_channel_id) {
24682456
None => panic!(),
24692457
Some(channel_info) => {
2470-
assert!(!channel_info.one_to_two.as_ref().unwrap().enabled);
2458+
assert!(channel_info.one_to_two.as_ref().unwrap().enabled);
24712459
}
24722460
};
24732461
}
@@ -2601,7 +2589,7 @@ pub(crate) mod tests {
26012589

26022590
// Mark the channel as permanently failed. This will also remove the two nodes
26032591
// and all of the entries will be tracked as removed.
2604-
network_graph.channel_failed_with_time(short_channel_id, true, Some(tracking_time));
2592+
network_graph.channel_failed_permanent_with_time(short_channel_id, Some(tracking_time));
26052593

26062594
// Should not remove from tracking if insufficient time has passed
26072595
network_graph.remove_stale_channels_and_tracking_with_time(
@@ -2634,7 +2622,7 @@ pub(crate) mod tests {
26342622

26352623
// Mark the channel as permanently failed. This will also remove the two nodes
26362624
// and all of the entries will be tracked as removed.
2637-
network_graph.channel_failed(short_channel_id, true);
2625+
network_graph.channel_failed_permanent(short_channel_id);
26382626

26392627
// The first time we call the following, the channel will have a removal time assigned.
26402628
network_graph.remove_stale_channels_and_tracking_with_time(removal_time);

0 commit comments

Comments
 (0)