lightningnetwork · yyforyongyu · Mar 5, 2025 · Mar 5, 2025 · Mar 6, 2025 · Mar 5, 2025
diff --git a/itest/flakes.go b/itest/flakes.go
@@ -0,0 +1,106 @@
+package itest
+
+import (
+	"runtime"
+	"time"
+
+	"github.com/btcsuite/btcd/btcutil"
+	"github.com/lightningnetwork/lnd/lntest"
+	"github.com/lightningnetwork/lnd/lntest/node"
+)
+
+// flakePreimageSettlement documents a flake found when testing the preimage
+// extraction logic in a force close. The scenario is,
+//   - Alice and Bob have a channel.
+//   - Alice sends an HTLC to Bob, and Bob won't settle it.
+//   - Alice goes offline.
+//   - Bob force closes the channel and claims the HTLC using the preimage using
+//     a sweeping tx.
+//
+// TODO(yy): Expose blockbeat to the link layer so the preimage extraction
+// happens in the same block where it's spent.
+func flakePreimageSettlement(ht *lntest.HarnessTest) {
+	// Mine a block to trigger the sweep. This is needed because the
+	// preimage extraction logic from the link is not managed by the
+	// blockbeat, which means the preimage may be sent to the contest
+	// resolver after it's launched, which means Bob would miss the block to
+	// launch the resolver.
+	ht.MineEmptyBlocks(1)
+
+	// Sleep for 2 seconds, which is needed to make sure the mempool has the
+	// correct tx. The above mined block can cause an RBF, if the preimage
+	// extraction has already been finished before the block is mined. This
+	// means Bob would have created the sweeping tx - mining another block
+	// would cause the sweeper to RBF it.
+	time.Sleep(2 * time.Second)
+}
+
+// flakeFundExtraUTXO documents a flake found when testing the sweeping behavior
+// of the given node, which would fail due to no wallet UTXO available, while
+// there should be enough.
+//
+// TODO(yy): remove it once the issue is resolved.
+func flakeFundExtraUTXO(ht *lntest.HarnessTest, node *node.HarnessNode) {
+	// The node should have enough wallet UTXOs here to sweep the HTLC in
+	// the end of this test. However, due to a known issue, the node's
+	// wallet may report there's no UTXO available. For details,
+	// - https://github.com/lightningnetwork/lnd/issues/8786
+	ht.FundCoins(btcutil.SatoshiPerBitcoin, node)
+}
+
+// flakeTxNotifierNeutrino documents a flake found when running force close
+// tests using neutrino backend, which is a race between two notifications - one
+// for the spending notification, the other for the block which contains the
+// spending tx.
+//
+// TODO(yy): remove it once the issue is resolved.
+func flakeTxNotifierNeutrino(ht *lntest.HarnessTest) {
+	// Mine an empty block the for neutrino backend. We need this step to
+	// trigger Bob's chain watcher to detect the force close tx. Deep down,
+	// this happens because the notification system for neutrino is very
+	// different from others. Specifically, when a block contains the force
+	// close tx is notified, these two calls,
+	// - RegisterBlockEpochNtfn, will notify the block first.
+	// - RegisterSpendNtfn, will wait for the neutrino notifier to sync to
+	//   the block, then perform a GetUtxo, which, by the time the spend
+	//   details are sent, the blockbeat is considered processed in Bob's
+	//   chain watcher.
+	//
+	// TODO(yy): refactor txNotifier to fix the above issue.
+	if ht.IsNeutrinoBackend() {
+		ht.MineEmptyBlocks(1)
+	}
+}
+
+// flakeSkipPendingSweepsCheckDarwin documents a flake found only in macOS
+// build. When running in macOS, we might see three anchor sweeps - one from the
+// local, one from the remote, and one from the pending remote:
+//   - the local one will be swept.
+//   - the remote one will be marked as failed due to `testmempoolaccept` check.
+//   - the pending remote one will not be attempted due to it being uneconomical
+//     since it was not used for CPFP.
+//
+// The anchor from the pending remote may or may not appear, which is a bug
+// found only in macOS - when updating the commitments, the channel state
+// machine somehow thinks we still have a pending remote commitment, causing it
+// to sweep the anchor from that version.
+//
+// TODO(yy): fix the above bug in the channel state machine.
+func flakeSkipPendingSweepsCheckDarwin(ht *lntest.HarnessTest,
+	node *node.HarnessNode, num int) {
+
+	// Skip the assertion below if it's on macOS.
+	if isDarwin() {
+		ht.Logf("Skipped AssertNumPendingSweeps for node %s",
+			node.Name())
+
+		return
+	}
+
+	ht.AssertNumPendingSweeps(node, num)
+}
+
+// isDarwin returns true if the test is running on a macOS.
+func isDarwin() bool {
+	return runtime.GOOS == "darwin"
+}
diff --git a/itest/lnd_channel_force_close_test.go b/itest/lnd_channel_force_close_test.go
@@ -1371,13 +1371,7 @@ func testFailingChannel(ht *lntest.HarnessTest) {
 	// Carol will use the correct preimage to resolve the HTLC on-chain.
 	ht.AssertNumPendingSweeps(carol, 1)
 
-	// Mine a block to trigger the sweep. This is needed because the
-	// preimage extraction logic from the link is not managed by the
-	// blockbeat, which means the preimage may be sent to the contest
-	// resolver after it's launched.
-	//
-	// TODO(yy): Expose blockbeat to the link layer.
-	ht.MineEmptyBlocks(1)
+	flakePreimageSettlement(ht)
 
 	// Carol should have broadcast her sweeping tx.
 	ht.AssertNumTxsInMempool(1)

diff --git a/itest/lnd_funding_test.go b/itest/lnd_funding_test.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"testing"
-	"time"
 
 	"github.com/btcsuite/btcd/btcutil"
 	"github.com/btcsuite/btcd/chaincfg/chainhash"
@@ -559,17 +558,6 @@ func runChannelFundingInputTypes(ht *lntest.HarnessTest, alice,
 		checkChannelBalance(carol, carolLocalBalance, 0, 0, 0)
 		checkChannelBalance(alice, 0, carolLocalBalance, 0, 0)
 
-		// TODO(yy): remove the sleep once the following bug is fixed.
-		//
-		// We may get the error `unable to gracefully close channel
-		// while peer is offline (try force closing it instead):
-		// channel link not found`. This happens because the channel
-		// link hasn't been added yet but we now proceed to closing the
-		// channel. We may need to revisit how the channel open event
-		// is created and make sure the event is only sent after all
-		// relevant states have been updated.
-		time.Sleep(2 * time.Second)
-
 		// Now that we're done with the test, the channel can be closed.
 		ht.CloseChannel(carol, chanPoint)
 
@@ -685,14 +673,6 @@ func runExternalFundingScriptEnforced(ht *lntest.HarnessTest) {
 	// HTLCs.
 	ht.AssertInvoiceSettled(dave, resp.PaymentAddr)
 
-	// TODO(yy): remove the sleep once the following bug is fixed.
-	// When the invoice is reported settled, the commitment dance is not
-	// yet finished, which can cause an error when closing the channel,
-	// saying there's active HTLCs. We need to investigate this issue and
-	// reverse the order to, first finish the commitment dance, then report
-	// the invoice as settled.
-	time.Sleep(2 * time.Second)
-
 	// Next we'll try but this time with Dave (the responder) as the
 	// initiator. This time the channel should be closed as normal.
 	ht.CloseChannel(dave, chanPoint2)
@@ -845,14 +825,6 @@ func runExternalFundingTaproot(ht *lntest.HarnessTest) {
 	// HTLCs.
 	ht.AssertInvoiceSettled(dave, resp.PaymentAddr)
 
-	// TODO(yy): remove the sleep once the following bug is fixed.
-	// When the invoice is reported settled, the commitment dance is not
-	// yet finished, which can cause an error when closing the channel,
-	// saying there's active HTLCs. We need to investigate this issue and
-	// reverse the order to, first finish the commitment dance, then report
-	// the invoice as settled.
-	time.Sleep(2 * time.Second)
-
 	// Next we'll try but this time with Dave (the responder) as the
 	// initiator. This time the channel should be closed as normal.
 	ht.CloseChannel(dave, chanPoint2)

diff --git a/itest/lnd_hold_invoice_force_test.go b/itest/lnd_hold_invoice_force_test.go
@@ -89,13 +89,6 @@ func testHoldInvoiceForceClose(ht *lntest.HarnessTest) {
 	blocksTillCancel := blocksTillExpiry -
 		lncfg.DefaultHoldInvoiceExpiryDelta
 
-	// When using ht.MineBlocks, for bitcoind backend, the block height
-	// synced differ significantly among subsystems. From observation, the
-	// LNWL syncs much faster than other subsystems, with more than 10
-	// blocks ahead. For this test case, CRTR may be lagging behind for
-	// more than 20 blocks. Thus we use slow mining instead.
-	// TODO(yy): fix block height asymmetry among all the subsystems.
-	//
 	// We first mine enough blocks to trigger an invoice cancelation.
 	ht.MineBlocks(int(blocksTillCancel))
 

diff --git a/itest/lnd_misc_test.go b/itest/lnd_misc_test.go
@@ -24,96 +24,6 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-// testDisconnectingTargetPeer performs a test which disconnects Alice-peer
-// from Bob-peer and then re-connects them again. We expect Alice to be able to
-// disconnect at any point.
-//
-// TODO(yy): move to lnd_network_test.
-func testDisconnectingTargetPeer(ht *lntest.HarnessTest) {
-	// We'll start both nodes with a high backoff so that they don't
-	// reconnect automatically during our test.
-	args := []string{
-		"--minbackoff=1m",
-		"--maxbackoff=1m",
-	}
-
-	alice := ht.NewNodeWithCoins("Alice", args)
-	bob := ht.NewNodeWithCoins("Bob", args)
-
-	// Start by connecting Alice and Bob with no channels.
-	ht.EnsureConnected(alice, bob)
-
-	chanAmt := funding.MaxBtcFundingAmount
-	pushAmt := btcutil.Amount(0)
-
-	// Create a new channel that requires 1 confs before it's considered
-	// open, then broadcast the funding transaction
-	const numConfs = 1
-	p := lntest.OpenChannelParams{
-		Amt:     chanAmt,
-		PushAmt: pushAmt,
-	}
-	stream := ht.OpenChannelAssertStream(alice, bob, p)
-
-	// At this point, the channel's funding transaction will have been
-	// broadcast, but not confirmed. Alice and Bob's nodes should reflect
-	// this when queried via RPC.
-	ht.AssertNumPendingOpenChannels(alice, 1)
-	ht.AssertNumPendingOpenChannels(bob, 1)
-
-	// Disconnect Alice-peer from Bob-peer should have no error.
-	ht.DisconnectNodes(alice, bob)
-
-	// Assert that the connection was torn down.
-	ht.AssertNotConnected(alice, bob)
-
-	// Mine a block, then wait for Alice's node to notify us that the
-	// channel has been opened.
-	ht.MineBlocksAndAssertNumTxes(numConfs, 1)
-
-	// At this point, the channel should be fully opened and there should
-	// be no pending channels remaining for either node.
-	ht.AssertNumPendingOpenChannels(alice, 0)
-	ht.AssertNumPendingOpenChannels(bob, 0)
-
-	// Reconnect the nodes so that the channel can become active.
-	ht.ConnectNodes(alice, bob)
-
-	// The channel should be listed in the peer information returned by
-	// both peers.
-	chanPoint := ht.WaitForChannelOpenEvent(stream)
-
-	// Check both nodes to ensure that the channel is ready for operation.
-	ht.AssertChannelExists(alice, chanPoint)
-	ht.AssertChannelExists(bob, chanPoint)
-
-	// Disconnect Alice-peer from Bob-peer should have no error.
-	ht.DisconnectNodes(alice, bob)
-
-	// Check existing connection.
-	ht.AssertNotConnected(alice, bob)
-
-	// Reconnect both nodes before force closing the channel.
-	ht.ConnectNodes(alice, bob)
-
-	// Finally, immediately close the channel. This function will also
-	// block until the channel is closed and will additionally assert the
-	// relevant channel closing post conditions.
-	ht.ForceCloseChannel(alice, chanPoint)
-
-	// Disconnect Alice-peer from Bob-peer should have no error.
-	ht.DisconnectNodes(alice, bob)
-
-	// Check that the nodes not connected.
-	ht.AssertNotConnected(alice, bob)
-
-	// Finally, re-connect both nodes.
-	ht.ConnectNodes(alice, bob)
-
-	// Check existing connection.
-	ht.AssertConnected(alice, bob)
-}
-
 // testSphinxReplayPersistence verifies that replayed onion packets are
 // rejected by a remote peer after a restart. We use a combination of unsafe
 // configuration arguments to force Carol to replay the same sphinx packet