Skip to content

Commit

Permalink
Merge pull request #7520 from bitromortac/2303-bimodal-fixes
Browse files Browse the repository at this point in the history
routing: prevent retrials for failed channels in bimodal pathfinding
  • Loading branch information
guggero authored Mar 23, 2023
2 parents fe24f19 + a3e46da commit 1052b13
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 35 deletions.
3 changes: 2 additions & 1 deletion cmd/lncli/cmd_mission_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ var setCfgCommand = cli.Command{
cli.StringFlag{
Name: "estimator",
Usage: "the probability estimator to use, choose " +
"between 'apriori' or 'bimodal'",
"between 'apriori' or 'bimodal' (bimodal is " +
"experimental)",
},
// Apriori config.
cli.DurationFlag{
Expand Down
4 changes: 4 additions & 0 deletions docs/release-notes/release-notes-0.16.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,10 @@ in the lnwire package](https://github.com/lightningnetwork/lnd/pull/7303)
https://github.com/lightningnetwork/lnd/pull/6815)
* [The a priori capacity factor is made configurable and its effect is
limited.](https://github.com/lightningnetwork/lnd/pull/7444)
* Local edges and hop hints [are extended with a
capacity](https://github.com/lightningnetwork/lnd/pull/7520) to avoid channel
white listing in probability calculations. The influence of the node
probability is reduced.

## Configuration
* Note that [this pathfinding change](https://github.com/lightningnetwork/lnd/pull/6815)
Expand Down
10 changes: 5 additions & 5 deletions lnrpc/routerrpc/router.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions lnrpc/routerrpc/router.proto
Original file line number Diff line number Diff line change
Expand Up @@ -513,9 +513,9 @@ message MissionControlConfig {

/*
ProbabilityModel defines which probability estimator should be used in
pathfinding.
pathfinding. Note that the bimodal estimator is experimental.
*/
ProbabilityModel Model = 6;
ProbabilityModel model = 6;

/*
EstimatorConfig is populated dependent on the estimator type.
Expand Down
4 changes: 2 additions & 2 deletions lnrpc/routerrpc/router.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1475,9 +1475,9 @@
"format": "uint64",
"description": "The minimum time that must have passed since the previously recorded failure\nbefore we raise the failure amount."
},
"Model": {
"model": {
"$ref": "#/definitions/MissionControlConfigProbabilityModel",
"description": "ProbabilityModel defines which probability estimator should be used in\npathfinding."
"description": "ProbabilityModel defines which probability estimator should be used in\npathfinding. Note that the bimodal estimator is experimental."
},
"apriori": {
"$ref": "#/definitions/routerrpcAprioriParameters"
Expand Down
16 changes: 15 additions & 1 deletion routing/pathfind.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ const (
// to avoid resizing and copies. It should be number on the same order as
// the number of active nodes in the network.
estimatedNodeCount = 10000

// fakeHopHintCapacity is the capacity we assume for hop hint channels.
// This is a high number, which expresses that a hop hint channel should
// be able to route payments.
fakeHopHintCapacity = btcutil.Amount(10 * btcutil.SatoshiPerBitcoin)
)

// pathFinder defines the interface of a path finding algorithm.
Expand Down Expand Up @@ -863,8 +868,17 @@ func findPath(g *graphParams, r *RestrictParams, cfg *PathFindingConfig,
return nil, 0, err
}

// We add hop hints that were supplied externally.
for _, reverseEdge := range additionalEdgesWithSrc[pivot] {
u.addPolicy(reverseEdge.sourceNode, reverseEdge.edge, 0)
// Hop hints don't contain a capacity. We set one here,
// since a capacity is needed for probability
// calculations. We set a high capacity to act as if
// there is enough liquidity, otherwise the hint would
// not have been added by a wallet.
u.addPolicy(
reverseEdge.sourceNode, reverseEdge.edge,
fakeHopHintCapacity,
)
}

amtToSend := partialPath.amountToReceive
Expand Down
57 changes: 49 additions & 8 deletions routing/probability_bimodal.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@ const (
// the timescale of about a week.
DefaultBimodalDecayTime = 7 * 24 * time.Hour

// BimodalScaleMsatLimit is the maximum value for BimodalScaleMsat to
// avoid numerical issues.
BimodalScaleMsatMax = lnwire.MilliSatoshi(21e17)
// BimodalScaleMsatMax is the maximum value for BimodalScaleMsat. We
// limit it here to the fakeHopHintCapacity to avoid issues with hop
// hint probability calculations.
BimodalScaleMsatMax = lnwire.MilliSatoshi(
1000 * fakeHopHintCapacity / 4,
)

// BimodalEstimatorName is used to identify the bimodal estimator.
BimodalEstimatorName = "bimodal"
Expand All @@ -46,6 +49,10 @@ var (

// ErrInvalidDecayTime is returned when we get a decay time below zero.
ErrInvalidDecayTime = errors.New("decay time must be larger than zero")

// ErrZeroCapacity is returned when we encounter a channel with zero
// capacity in probability estimation.
ErrZeroCapacity = errors.New("capacity must be larger than zero")
)

// BimodalConfig contains configuration for our probability estimator.
Expand Down Expand Up @@ -223,7 +230,9 @@ func (p *BimodalEstimator) directProbability(now time.Time,
capacity, successAmount, failAmount, amt,
)
if err != nil {
log.Errorf("error computing probability: %v", err)
log.Errorf("error computing probability to node: %v "+
"(node: %v, results: %v, amt: %v, capacity: %v)",
err, toNode, results, amt, capacity)

return 0.0
}
Expand Down Expand Up @@ -299,6 +308,32 @@ func (p *BimodalEstimator) calculateProbability(directProbability float64,
return directProbability
}

// If we have up-to-date information about the channel we want to use,
// i.e. the info stems from results not longer ago than the decay time,
// we will only use the direct probability. This is needed in order to
// avoid that other previous results (on all other channels of the same
// routing node) will distort and pin the calculated probability even if
// we have accurate direct information. This helps to dip the
// probability below the min probability in case of failures, to start
// the splitting process.
directResult, ok := results[toNode]
if ok {
latest := directResult.SuccessTime
if directResult.FailTime.After(latest) {
latest = directResult.FailTime
}

// We use BimonodalDecayTime to judge the currentness of the
// data. It is the time scale on which we assume to have lost
// information.
if now.Sub(latest) < p.BimodalDecayTime {
log.Tracef("Using direct probability for node %v: %v",
toNode, directResult)

return directProbability
}
}

// w is a parameter which determines how strongly the other channels of
// a node should be incorporated, the higher the stronger.
w := p.BimodalNodeWeight
Expand Down Expand Up @@ -438,10 +473,10 @@ func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
failAmount := float64(failAmountMsat)
amount := float64(amountMsat)

// Capacity being zero is a sentinel value to ignore the probability
// estimation, we'll return the full probability here.
// In order for this formula to give reasonable results, we need to have
// an estimate of the capacity of a channel (or edge between nodes).
if capacity == 0.0 {
return 1.0, nil
return 0, ErrZeroCapacity
}

// We cannot send more than the capacity.
Expand All @@ -455,11 +490,17 @@ func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,

// failAmount should be capacity at max.
if failAmount > capacity {
log.Debugf("Correcting failAmount %v to capacity %v",
failAmount, capacity)

failAmount = capacity
}

// successAmount should be capacity at max.
if successAmount > capacity {
log.Debugf("Correcting successAmount %v to capacity %v",
successAmount, capacity)

successAmount = capacity
}

Expand All @@ -468,7 +509,7 @@ func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
// happen if a large channel gets closed and smaller ones remain, but
// it should recover with the time decay.
if failAmount <= successAmount {
log.Tracef("fail amount (%v) is larger than or equal the "+
log.Tracef("fail amount (%v) is smaller than or equal the "+
"success amount (%v) for capacity (%v)",
failAmountMsat, successAmountMsat, capacityMsat)

Expand Down
70 changes: 57 additions & 13 deletions routing/probability_bimodal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,16 @@ func TestSuccessProbability(t *testing.T) {
require.NoError(t, err)
})
}

// We expect an error when the capacity is zero.
t.Run("zero capacity", func(t *testing.T) {
t.Parallel()

_, err := estimator.probabilityFormula(
0, 0, 0, 0,
)
require.ErrorIs(t, err, ErrZeroCapacity)
})
}

// TestIntegral tests certain limits of the probability distribution integral.
Expand Down Expand Up @@ -369,35 +379,44 @@ func TestComputeProbability(t *testing.T) {
nodeWeight := 1 / 5.
toNode := route.Vertex{10}
tolerance := 0.01
now := time.Unix(0, 0)
decayTime := time.Duration(1) * time.Hour * 24

// makeNodeResults prepares forwarding data for the other channels of
// the node.
makeNodeResults := func(successes []bool, now time.Time) NodeResults {
makeNodeResults := func(successes []bool, resultsTime time.Time,
directResultTime time.Time) NodeResults {

results := make(NodeResults, len(successes))

for i, s := range successes {
vertex := route.Vertex{byte(i)}

results[vertex] = TimedPairResult{
FailTime: now, FailAmt: 1,
FailTime: resultsTime, FailAmt: 1,
}
if s {
results[vertex] = TimedPairResult{
SuccessTime: now, SuccessAmt: 1,
SuccessTime: resultsTime, SuccessAmt: 1,
}
}
}

// Include a direct result.
results[toNode] = TimedPairResult{
SuccessTime: directResultTime, SuccessAmt: 1,
}

return results
}

tests := []struct {
name string
directProbability float64
recentDirectResult bool
otherResults []bool
expectedProbability float64
delay time.Duration
resultsTimeAgo time.Duration
}{
// If no other information is available, use the direct
// probability.
Expand Down Expand Up @@ -516,7 +535,7 @@ func TestComputeProbability(t *testing.T) {
"time",
directProbability: 1.0,
otherResults: []bool{true},
delay: decayTime,
resultsTimeAgo: decayTime,
expectedProbability: 1.00,
},
// A failure that was experienced some time ago won't influence
Expand All @@ -525,15 +544,15 @@ func TestComputeProbability(t *testing.T) {
name: "success, single fail, decay time",
directProbability: 1.0,
otherResults: []bool{false},
delay: decayTime,
resultsTimeAgo: decayTime,
expectedProbability: 0.9314,
},
// Information from a long time ago doesn't have any effect.
{
name: "success, single fail, long ago",
directProbability: 1.0,
otherResults: []bool{false},
delay: 10 * decayTime,
resultsTimeAgo: 10 * decayTime,
expectedProbability: 1.0,
},
{
Expand All @@ -542,7 +561,7 @@ func TestComputeProbability(t *testing.T) {
otherResults: []bool{
true, true, true, true, true,
},
delay: decayTime,
resultsTimeAgo: decayTime,
expectedProbability: 0.269,
},
// Very recent info approaches the case with no time decay.
Expand All @@ -552,9 +571,22 @@ func TestComputeProbability(t *testing.T) {
otherResults: []bool{
true, true, true, true, true,
},
delay: decayTime / 10,
resultsTimeAgo: decayTime / 10,
expectedProbability: 0.741,
},
// If we have recent info on the direct probability, we don't
// include node-wide info. Here we check that a recent failure
// is not pinned to a high probability by many successes on the
// node.
{
name: "recent direct result",
directProbability: 0.1,
recentDirectResult: true,
otherResults: []bool{
true, true, true, true, true,
},
expectedProbability: 0.1,
},
}

estimator := BimodalEstimator{
Expand All @@ -570,9 +602,19 @@ func TestComputeProbability(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
t.Parallel()

then := time.Unix(0, 0)
results := makeNodeResults(test.otherResults, then)
now := then.Add(test.delay)
var directResultTime time.Time
if test.recentDirectResult {
directResultTime = now.Add(-decayTime / 2)
} else {
directResultTime = now.Add(-2 * decayTime)
}

resultsTime := now.Add(-test.resultsTimeAgo)

results := makeNodeResults(
test.otherResults, resultsTime,
directResultTime,
)

p := estimator.calculateProbability(
test.directProbability, now, results, toNode,
Expand Down Expand Up @@ -649,7 +691,9 @@ func FuzzProbability(f *testing.F) {
estimator := BimodalEstimator{
BimodalConfig: BimodalConfig{BimodalScaleMsat: scale},
}
f.Add(uint64(0), uint64(0), uint64(0), uint64(0))

// We don't start fuzzing at zero, because that would cause an error.
f.Add(uint64(1), uint64(0), uint64(0), uint64(0))

f.Fuzz(func(t *testing.T, capacity, successAmt, failAmt, amt uint64) {
_, err := estimator.probabilityFormula(
Expand Down
2 changes: 1 addition & 1 deletion routing/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -1689,7 +1689,7 @@ func (r *ChannelRouter) processUpdate(msg interface{},
return err
}

log.Debugf("New channel update applied: %v",
log.Tracef("New channel update applied: %v",
newLogClosure(func() string { return spew.Sdump(msg) }))
r.stats.incNumChannelUpdates()

Expand Down
Loading

0 comments on commit 1052b13

Please sign in to comment.