@@ -27,6 +27,7 @@ import (
27
27
"github.com/ethereum/go-ethereum/core/types"
28
28
"github.com/ethereum/go-ethereum/ethdb"
29
29
"github.com/ethereum/go-ethereum/log"
30
+ "github.com/ethereum/go-ethereum/metrics"
30
31
)
31
32
32
33
// ErrNotRequested is returned by the trie sync when it's requested to process a
@@ -42,6 +43,16 @@ var ErrAlreadyProcessed = errors.New("already processed")
42
43
// memory if the node was configured with a significant number of peers.
43
44
const maxFetchesPerDepth = 16384
44
45
46
+ var (
47
+ // deletionGauge is the metric to track how many trie node deletions
48
+ // are performed in total during the sync process.
49
+ deletionGauge = metrics .NewRegisteredGauge ("trie/sync/delete" , nil )
50
+
51
+ // lookupGauge is the metric to track how many trie node lookups are
52
+ // performed to determine if node needs to be deleted.
53
+ lookupGauge = metrics .NewRegisteredGauge ("trie/sync/lookup" , nil )
54
+ )
55
+
45
56
// SyncPath is a path tuple identifying a particular trie node either in a single
46
57
// trie (account) or a layered trie (account -> storage).
47
58
//
@@ -93,9 +104,10 @@ type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Ha
93
104
94
105
// nodeRequest represents a scheduled or already in-flight trie node retrieval request.
95
106
type nodeRequest struct {
96
- hash common.Hash // Hash of the trie node to retrieve
97
- path []byte // Merkle path leading to this node for prioritization
98
- data []byte // Data content of the node, cached until all subtrees complete
107
+ hash common.Hash // Hash of the trie node to retrieve
108
+ path []byte // Merkle path leading to this node for prioritization
109
+ data []byte // Data content of the node, cached until all subtrees complete
110
+ deletes [][]byte // List of internal path segments for trie nodes to delete
99
111
100
112
parent * nodeRequest // Parent state node referencing this entry
101
113
deps int // Number of dependencies before allowed to commit this node
@@ -125,18 +137,20 @@ type CodeSyncResult struct {
125
137
// syncMemBatch is an in-memory buffer of successfully downloaded but not yet
126
138
// persisted data items.
127
139
type syncMemBatch struct {
128
- nodes map [string ][]byte // In-memory membatch of recently completed nodes
129
- hashes map [string ]common.Hash // Hashes of recently completed nodes
130
- codes map [common.Hash ][]byte // In-memory membatch of recently completed codes
131
- size uint64 // Estimated batch-size of in-memory data.
140
+ nodes map [string ][]byte // In-memory membatch of recently completed nodes
141
+ hashes map [string ]common.Hash // Hashes of recently completed nodes
142
+ deletes map [string ]struct {} // List of paths for trie node to delete
143
+ codes map [common.Hash ][]byte // In-memory membatch of recently completed codes
144
+ size uint64 // Estimated batch-size of in-memory data.
132
145
}
133
146
134
147
// newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
135
148
func newSyncMemBatch () * syncMemBatch {
136
149
return & syncMemBatch {
137
- nodes : make (map [string ][]byte ),
138
- hashes : make (map [string ]common.Hash ),
139
- codes : make (map [common.Hash ][]byte ),
150
+ nodes : make (map [string ][]byte ),
151
+ hashes : make (map [string ]common.Hash ),
152
+ deletes : make (map [string ]struct {}),
153
+ codes : make (map [common.Hash ][]byte ),
140
154
}
141
155
}
142
156
@@ -347,16 +361,23 @@ func (s *Sync) ProcessNode(result NodeSyncResult) error {
347
361
// Commit flushes the data stored in the internal membatch out to persistent
348
362
// storage, returning any occurred error.
349
363
func (s * Sync ) Commit (dbw ethdb.Batch ) error {
350
- // Dump the membatch into a database dbw
364
+ // Flush the pending node writes into database batch.
351
365
for path , value := range s .membatch .nodes {
352
366
owner , inner := ResolvePath ([]byte (path ))
353
367
rawdb .WriteTrieNode (dbw , owner , inner , s .membatch .hashes [path ], value , s .scheme )
354
368
}
369
+ // Flush the pending node deletes into the database batch.
370
+ // Please note that each written and deleted node has a
371
+ // unique path, ensuring no duplication occurs.
372
+ for path := range s .membatch .deletes {
373
+ owner , inner := ResolvePath ([]byte (path ))
374
+ rawdb .DeleteTrieNode (dbw , owner , inner , common.Hash {} /* unused */ , s .scheme )
375
+ }
376
+ // Flush the pending code writes into database batch.
355
377
for hash , value := range s .membatch .codes {
356
378
rawdb .WriteCode (dbw , hash , value )
357
379
}
358
- // Drop the membatch data and return
359
- s .membatch = newSyncMemBatch ()
380
+ s .membatch = newSyncMemBatch () // reset the batch
360
381
return nil
361
382
}
362
383
@@ -425,6 +446,39 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
425
446
node : node .Val ,
426
447
path : append (append ([]byte (nil ), req .path ... ), key ... ),
427
448
}}
449
+ // Mark all internal nodes between shortNode and its **in disk**
450
+ // child as invalid. This is essential in the case of path mode
451
+ // scheme; otherwise, state healing might overwrite existing child
452
+ // nodes silently while leaving a dangling parent node within the
453
+ // range of this internal path on disk. This would break the
454
+ // guarantee for state healing.
455
+ //
456
+ // While it's possible for this shortNode to overwrite a previously
457
+ // existing full node, the other branches of the fullNode can be
458
+ // retained as they remain untouched and complete.
459
+ //
460
+ // This step is only necessary for path mode, as there is no deletion
461
+ // in hash mode at all.
462
+ if _ , ok := node .Val .(hashNode ); ok && s .scheme == rawdb .PathScheme {
463
+ owner , inner := ResolvePath (req .path )
464
+ for i := 1 ; i < len (key ); i ++ {
465
+ // While checking for a non-existent item in Pebble can be less efficient
466
+ // without a bloom filter, the relatively low frequency of lookups makes
467
+ // the performance impact negligible.
468
+ var exists bool
469
+ if owner == (common.Hash {}) {
470
+ exists = rawdb .ExistsAccountTrieNode (s .database , append (inner , key [:i ]... ))
471
+ } else {
472
+ exists = rawdb .ExistsStorageTrieNode (s .database , owner , append (inner , key [:i ]... ))
473
+ }
474
+ if exists {
475
+ req .deletes = append (req .deletes , key [:i ])
476
+ deletionGauge .Inc (1 )
477
+ log .Debug ("Detected dangling node" , "owner" , owner , "path" , append (inner , key [:i ]... ))
478
+ }
479
+ }
480
+ lookupGauge .Inc (int64 (len (key ) - 1 ))
481
+ }
428
482
case * fullNode :
429
483
for i := 0 ; i < 17 ; i ++ {
430
484
if node .Children [i ] != nil {
@@ -509,10 +563,19 @@ func (s *Sync) commitNodeRequest(req *nodeRequest) error {
509
563
// Write the node content to the membatch
510
564
s .membatch .nodes [string (req .path )] = req .data
511
565
s .membatch .hashes [string (req .path )] = req .hash
566
+
512
567
// The size tracking refers to the db-batch, not the in-memory data.
513
- // Therefore, we ignore the req.path, and account only for the hash+data
514
- // which eventually is written to db.
515
- s .membatch .size += common .HashLength + uint64 (len (req .data ))
568
+ if s .scheme == rawdb .PathScheme {
569
+ s .membatch .size += uint64 (len (req .path ) + len (req .data ))
570
+ } else {
571
+ s .membatch .size += common .HashLength + uint64 (len (req .data ))
572
+ }
573
+ // Delete the internal nodes which are marked as invalid
574
+ for _ , segment := range req .deletes {
575
+ path := append (req .path , segment ... )
576
+ s .membatch .deletes [string (path )] = struct {}{}
577
+ s .membatch .size += uint64 (len (path ))
578
+ }
516
579
delete (s .nodeReqs , string (req .path ))
517
580
s .fetches [len (req .path )]--
518
581
0 commit comments