Merge pull request #7520 from bitromortac/2303-bimodal-fixes

routing: prevent retrials for failed channels in bimodal pathfinding
lightningnetwork · Mar 23, 2023 · 1052b13 · 1052b13
2 parents fe24f19 + a3e46da
commit 1052b13
Show file tree

Hide file tree

Showing 11 changed files with 143 additions and 35 deletions.
diff --git a/cmd/lncli/cmd_mission_control.go b/cmd/lncli/cmd_mission_control.go
@@ -65,7 +65,8 @@ var setCfgCommand = cli.Command{
 		cli.StringFlag{
 			Name: "estimator",
 			Usage: "the probability estimator to use, choose " +
-				"between 'apriori' or 'bimodal'",
+				"between 'apriori' or 'bimodal' (bimodal is " +
+				"experimental)",
 		},
 		// Apriori config.
 		cli.DurationFlag{

diff --git a/docs/release-notes/release-notes-0.16.0.md b/docs/release-notes/release-notes-0.16.0.md
@@ -459,6 +459,10 @@ in the lnwire package](https://github.com/lightningnetwork/lnd/pull/7303)
   https://github.com/lightningnetwork/lnd/pull/6815)
 * [The a priori capacity factor is made configurable and its effect is 
   limited.](https://github.com/lightningnetwork/lnd/pull/7444)
+* Local edges and hop hints [are extended with a
+  capacity](https://github.com/lightningnetwork/lnd/pull/7520) to avoid channel
+  white listing in probability calculations. The influence of the node
+  probability is reduced.
 
 ## Configuration
 * Note that [this pathfinding change](https://github.com/lightningnetwork/lnd/pull/6815)

diff --git a/lnrpc/routerrpc/router.pb.go b/lnrpc/routerrpc/router.pb.go
diff --git a/lnrpc/routerrpc/router.proto b/lnrpc/routerrpc/router.proto
@@ -513,9 +513,9 @@ message MissionControlConfig {
 
     /*
     ProbabilityModel defines which probability estimator should be used in
-    pathfinding.
+    pathfinding. Note that the bimodal estimator is experimental.
     */
-    ProbabilityModel Model = 6;
+    ProbabilityModel model = 6;
 
     /*
     EstimatorConfig is populated dependent on the estimator type.

diff --git a/lnrpc/routerrpc/router.swagger.json b/lnrpc/routerrpc/router.swagger.json
@@ -1475,9 +1475,9 @@
           "format": "uint64",
           "description": "The minimum time that must have passed since the previously recorded failure\nbefore we raise the failure amount."
         },
-        "Model": {
+        "model": {
           "$ref": "#/definitions/MissionControlConfigProbabilityModel",
-          "description": "ProbabilityModel defines which probability estimator should be used in\npathfinding."
+          "description": "ProbabilityModel defines which probability estimator should be used in\npathfinding. Note that the bimodal estimator is experimental."
         },
         "apriori": {
           "$ref": "#/definitions/routerrpcAprioriParameters"

diff --git a/routing/pathfind.go b/routing/pathfind.go
@@ -37,6 +37,11 @@ const (
 	// to avoid resizing and copies. It should be number on the same order as
 	// the number of active nodes in the network.
 	estimatedNodeCount = 10000
+
+	// fakeHopHintCapacity is the capacity we assume for hop hint channels.
+	// This is a high number, which expresses that a hop hint channel should
+	// be able to route payments.
+	fakeHopHintCapacity = btcutil.Amount(10 * btcutil.SatoshiPerBitcoin)
 )
 
 // pathFinder defines the interface of a path finding algorithm.
@@ -863,8 +868,17 @@ func findPath(g *graphParams, r *RestrictParams, cfg *PathFindingConfig,
 			return nil, 0, err
 		}
 
+		// We add hop hints that were supplied externally.
 		for _, reverseEdge := range additionalEdgesWithSrc[pivot] {
-			u.addPolicy(reverseEdge.sourceNode, reverseEdge.edge, 0)
+			// Hop hints don't contain a capacity. We set one here,
+			// since a capacity is needed for probability
+			// calculations. We set a high capacity to act as if
+			// there is enough liquidity, otherwise the hint would
+			// not have been added by a wallet.
+			u.addPolicy(
+				reverseEdge.sourceNode, reverseEdge.edge,
+				fakeHopHintCapacity,
+			)
 		}
 
 		amtToSend := partialPath.amountToReceive

diff --git a/routing/probability_bimodal.go b/routing/probability_bimodal.go
@@ -28,9 +28,12 @@ const (
 	// the timescale of about a week.
 	DefaultBimodalDecayTime = 7 * 24 * time.Hour
 
-	// BimodalScaleMsatLimit is the maximum value for BimodalScaleMsat to
-	// avoid numerical issues.
-	BimodalScaleMsatMax = lnwire.MilliSatoshi(21e17)
+	// BimodalScaleMsatMax is the maximum value for BimodalScaleMsat. We
+	// limit it here to the fakeHopHintCapacity to avoid issues with hop
+	// hint probability calculations.
+	BimodalScaleMsatMax = lnwire.MilliSatoshi(
+		1000 * fakeHopHintCapacity / 4,
+	)
 
 	// BimodalEstimatorName is used to identify the bimodal estimator.
 	BimodalEstimatorName = "bimodal"
@@ -46,6 +49,10 @@ var (
 
 	// ErrInvalidDecayTime is returned when we get a decay time below zero.
 	ErrInvalidDecayTime = errors.New("decay time must be larger than zero")
+
+	// ErrZeroCapacity is returned when we encounter a channel with zero
+	// capacity in probability estimation.
+	ErrZeroCapacity = errors.New("capacity must be larger than zero")
 )
 
 // BimodalConfig contains configuration for our probability estimator.
@@ -223,7 +230,9 @@ func (p *BimodalEstimator) directProbability(now time.Time,
 		capacity, successAmount, failAmount, amt,
 	)
 	if err != nil {
-		log.Errorf("error computing probability: %v", err)
+		log.Errorf("error computing probability to node: %v "+
+			"(node: %v, results: %v, amt: %v, capacity: %v)",
+			err, toNode, results, amt, capacity)
 
 		return 0.0
 	}
@@ -299,6 +308,32 @@ func (p *BimodalEstimator) calculateProbability(directProbability float64,
 		return directProbability
 	}
 
+	// If we have up-to-date information about the channel we want to use,
+	// i.e. the info stems from results not longer ago than the decay time,
+	// we will only use the direct probability. This is needed in order to
+	// avoid that other previous results (on all other channels of the same
+	// routing node) will distort and pin the calculated probability even if
+	// we have accurate direct information. This helps to dip the
+	// probability below the min probability in case of failures, to start
+	// the splitting process.
+	directResult, ok := results[toNode]
+	if ok {
+		latest := directResult.SuccessTime
+		if directResult.FailTime.After(latest) {
+			latest = directResult.FailTime
+		}
+
+		// We use BimonodalDecayTime to judge the currentness of the
+		// data. It is the time scale on which we assume to have lost
+		// information.
+		if now.Sub(latest) < p.BimodalDecayTime {
+			log.Tracef("Using direct probability for node %v: %v",
+				toNode, directResult)
+
+			return directProbability
+		}
+	}
+
 	// w is a parameter which determines how strongly the other channels of
 	// a node should be incorporated, the higher the stronger.
 	w := p.BimodalNodeWeight
@@ -438,10 +473,10 @@ func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
 	failAmount := float64(failAmountMsat)
 	amount := float64(amountMsat)
 
-	// Capacity being zero is a sentinel value to ignore the probability
-	// estimation, we'll return the full probability here.
+	// In order for this formula to give reasonable results, we need to have
+	// an estimate of the capacity of a channel (or edge between nodes).
 	if capacity == 0.0 {
-		return 1.0, nil
+		return 0, ErrZeroCapacity
 	}
 
 	// We cannot send more than the capacity.
@@ -455,11 +490,17 @@ func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
 
 	// failAmount should be capacity at max.
 	if failAmount > capacity {
+		log.Debugf("Correcting failAmount %v to capacity %v",
+			failAmount, capacity)
+
 		failAmount = capacity
 	}
 
 	// successAmount should be capacity at max.
 	if successAmount > capacity {
+		log.Debugf("Correcting successAmount %v to capacity %v",
+			successAmount, capacity)
+
 		successAmount = capacity
 	}
 
@@ -468,7 +509,7 @@ func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
 	// happen if a large channel gets closed and smaller ones remain, but
 	// it should recover with the time decay.
 	if failAmount <= successAmount {
-		log.Tracef("fail amount (%v) is larger than or equal the "+
+		log.Tracef("fail amount (%v) is smaller than or equal the "+
 			"success amount (%v) for capacity (%v)",
 			failAmountMsat, successAmountMsat, capacityMsat)
 

diff --git a/routing/probability_bimodal_test.go b/routing/probability_bimodal_test.go
@@ -232,6 +232,16 @@ func TestSuccessProbability(t *testing.T) {
 			require.NoError(t, err)
 		})
 	}
+
+	// We expect an error when the capacity is zero.
+	t.Run("zero capacity", func(t *testing.T) {
+		t.Parallel()
+
+		_, err := estimator.probabilityFormula(
+			0, 0, 0, 0,
+		)
+		require.ErrorIs(t, err, ErrZeroCapacity)
+	})
 }
 
 // TestIntegral tests certain limits of the probability distribution integral.
@@ -369,35 +379,44 @@ func TestComputeProbability(t *testing.T) {
 	nodeWeight := 1 / 5.
 	toNode := route.Vertex{10}
 	tolerance := 0.01
+	now := time.Unix(0, 0)
 	decayTime := time.Duration(1) * time.Hour * 24
 
 	// makeNodeResults prepares forwarding data for the other channels of
 	// the node.
-	makeNodeResults := func(successes []bool, now time.Time) NodeResults {
+	makeNodeResults := func(successes []bool, resultsTime time.Time,
+		directResultTime time.Time) NodeResults {
+
 		results := make(NodeResults, len(successes))
 
 		for i, s := range successes {
 			vertex := route.Vertex{byte(i)}
 
 			results[vertex] = TimedPairResult{
-				FailTime: now, FailAmt: 1,
+				FailTime: resultsTime, FailAmt: 1,
 			}
 			if s {
 				results[vertex] = TimedPairResult{
-					SuccessTime: now, SuccessAmt: 1,
+					SuccessTime: resultsTime, SuccessAmt: 1,
 				}
 			}
 		}
 
+		// Include a direct result.
+		results[toNode] = TimedPairResult{
+			SuccessTime: directResultTime, SuccessAmt: 1,
+		}
+
 		return results
 	}
 
 	tests := []struct {
 		name                string
 		directProbability   float64
+		recentDirectResult  bool
 		otherResults        []bool
 		expectedProbability float64
-		delay               time.Duration
+		resultsTimeAgo      time.Duration
 	}{
 		// If no other information is available, use the direct
 		// probability.
@@ -516,7 +535,7 @@ func TestComputeProbability(t *testing.T) {
 				"time",
 			directProbability:   1.0,
 			otherResults:        []bool{true},
-			delay:               decayTime,
+			resultsTimeAgo:      decayTime,
 			expectedProbability: 1.00,
 		},
 		// A failure that was experienced some time ago won't influence
@@ -525,15 +544,15 @@ func TestComputeProbability(t *testing.T) {
 			name:                "success, single fail, decay time",
 			directProbability:   1.0,
 			otherResults:        []bool{false},
-			delay:               decayTime,
+			resultsTimeAgo:      decayTime,
 			expectedProbability: 0.9314,
 		},
 		// Information from a long time ago doesn't have any effect.
 		{
 			name:                "success, single fail, long ago",
 			directProbability:   1.0,
 			otherResults:        []bool{false},
-			delay:               10 * decayTime,
+			resultsTimeAgo:      10 * decayTime,
 			expectedProbability: 1.0,
 		},
 		{
@@ -542,7 +561,7 @@ func TestComputeProbability(t *testing.T) {
 			otherResults: []bool{
 				true, true, true, true, true,
 			},
-			delay:               decayTime,
+			resultsTimeAgo:      decayTime,
 			expectedProbability: 0.269,
 		},
 		// Very recent info approaches the case with no time decay.
@@ -552,9 +571,22 @@ func TestComputeProbability(t *testing.T) {
 			otherResults: []bool{
 				true, true, true, true, true,
 			},
-			delay:               decayTime / 10,
+			resultsTimeAgo:      decayTime / 10,
 			expectedProbability: 0.741,
 		},
+		// If we have recent info on the direct probability, we don't
+		// include node-wide info. Here we check that a recent failure
+		// is not pinned to a high probability by many successes on the
+		// node.
+		{
+			name:               "recent direct result",
+			directProbability:  0.1,
+			recentDirectResult: true,
+			otherResults: []bool{
+				true, true, true, true, true,
+			},
+			expectedProbability: 0.1,
+		},
 	}
 
 	estimator := BimodalEstimator{
@@ -570,9 +602,19 @@ func TestComputeProbability(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			t.Parallel()
 
-			then := time.Unix(0, 0)
-			results := makeNodeResults(test.otherResults, then)
-			now := then.Add(test.delay)
+			var directResultTime time.Time
+			if test.recentDirectResult {
+				directResultTime = now.Add(-decayTime / 2)
+			} else {
+				directResultTime = now.Add(-2 * decayTime)
+			}
+
+			resultsTime := now.Add(-test.resultsTimeAgo)
+
+			results := makeNodeResults(
+				test.otherResults, resultsTime,
+				directResultTime,
+			)
 
 			p := estimator.calculateProbability(
 				test.directProbability, now, results, toNode,
@@ -649,7 +691,9 @@ func FuzzProbability(f *testing.F) {
 	estimator := BimodalEstimator{
 		BimodalConfig: BimodalConfig{BimodalScaleMsat: scale},
 	}
-	f.Add(uint64(0), uint64(0), uint64(0), uint64(0))
+
+	// We don't start fuzzing at zero, because that would cause an error.
+	f.Add(uint64(1), uint64(0), uint64(0), uint64(0))
 
 	f.Fuzz(func(t *testing.T, capacity, successAmt, failAmt, amt uint64) {
 		_, err := estimator.probabilityFormula(

diff --git a/routing/router.go b/routing/router.go
@@ -1689,7 +1689,7 @@ func (r *ChannelRouter) processUpdate(msg interface{},
 			return err
 		}
 
-		log.Debugf("New channel update applied: %v",
+		log.Tracef("New channel update applied: %v",
 			newLogClosure(func() string { return spew.Sdump(msg) }))
 		r.stats.incNumChannelUpdates()