Skip to content

Commit fa0df76

Browse files
trie/triedb/hashdb: take lock around access to dirties cache (#28542)
Add read locking of db lock around access to dirties cache in hashdb.Database to prevent data race versus hashdb.Database.dereference which can modify the dirities map by deleting an item. Fixes #28541 --------- Co-authored-by: Gary Rong <[email protected]>
1 parent ab0eb46 commit fa0df76

File tree

2 files changed

+23
-57
lines changed

2 files changed

+23
-57
lines changed

trie/database.go

-11
Original file line numberDiff line numberDiff line change
@@ -240,17 +240,6 @@ func (db *Database) Dereference(root common.Hash) error {
240240
return nil
241241
}
242242

243-
// Node retrieves the rlp-encoded node blob with provided node hash. It's
244-
// only supported by hash-based database and will return an error for others.
245-
// Note, this function should be deprecated once ETH66 is deprecated.
246-
func (db *Database) Node(hash common.Hash) ([]byte, error) {
247-
hdb, ok := db.backend.(*hashdb.Database)
248-
if !ok {
249-
return nil, errors.New("not supported")
250-
}
251-
return hdb.Node(hash)
252-
}
253-
254243
// Recover rollbacks the database to a specified historical point. The state is
255244
// supported as the rollback destination only if it's canonical state and the
256245
// corresponding trie histories are existent. It's only supported by path-based

trie/triedb/hashdb/database.go

+23-46
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,6 @@ var Defaults = &Config{
8282
// Database is an intermediate write layer between the trie data structures and
8383
// the disk database. The aim is to accumulate trie writes in-memory and only
8484
// periodically flush a couple tries to disk, garbage collecting the remainder.
85-
//
86-
// Note, the trie Database is **not** thread safe in its mutations, but it **is**
87-
// thread safe in providing individual, independent node access. The rationale
88-
// behind this split design is to provide read access to RPC handlers and sync
89-
// servers even while the trie is executing expensive garbage collection.
9085
type Database struct {
9186
diskdb ethdb.Database // Persistent storage for matured trie nodes
9287
resolver ChildResolver // The handler to resolve children of nodes
@@ -113,7 +108,7 @@ type Database struct {
113108
// cachedNode is all the information we know about a single cached trie node
114109
// in the memory database write layer.
115110
type cachedNode struct {
116-
node []byte // Encoded node blob
111+
node []byte // Encoded node blob, immutable
117112
parents uint32 // Number of live nodes referencing this one
118113
external map[common.Hash]struct{} // The set of external children
119114
flushPrev common.Hash // Previous node in the flush-list
@@ -152,9 +147,9 @@ func New(diskdb ethdb.Database, config *Config, resolver ChildResolver) *Databas
152147
}
153148
}
154149

155-
// insert inserts a simplified trie node into the memory database.
156-
// All nodes inserted by this function will be reference tracked
157-
// and in theory should only used for **trie nodes** insertion.
150+
// insert inserts a trie node into the memory database. All nodes inserted by
151+
// this function will be reference tracked. This function assumes the lock is
152+
// already held.
158153
func (db *Database) insert(hash common.Hash, node []byte) {
159154
// If the node's already cached, skip
160155
if _, ok := db.dirties[hash]; ok {
@@ -183,9 +178,9 @@ func (db *Database) insert(hash common.Hash, node []byte) {
183178
db.dirtiesSize += common.StorageSize(common.HashLength + len(node))
184179
}
185180

186-
// Node retrieves an encoded cached trie node from memory. If it cannot be found
181+
// node retrieves an encoded cached trie node from memory. If it cannot be found
187182
// cached, the method queries the persistent database for the content.
188-
func (db *Database) Node(hash common.Hash) ([]byte, error) {
183+
func (db *Database) node(hash common.Hash) ([]byte, error) {
189184
// It doesn't make sense to retrieve the metaroot
190185
if hash == (common.Hash{}) {
191186
return nil, errors.New("not found")
@@ -198,11 +193,14 @@ func (db *Database) Node(hash common.Hash) ([]byte, error) {
198193
return enc, nil
199194
}
200195
}
201-
// Retrieve the node from the dirty cache if available
196+
// Retrieve the node from the dirty cache if available.
202197
db.lock.RLock()
203198
dirty := db.dirties[hash]
204199
db.lock.RUnlock()
205200

201+
// Return the cached node if it's found in the dirty set.
202+
// The dirty.node field is immutable and safe to read it
203+
// even without lock guard.
206204
if dirty != nil {
207205
memcacheDirtyHitMeter.Mark(1)
208206
memcacheDirtyReadMeter.Mark(int64(len(dirty.node)))
@@ -223,20 +221,6 @@ func (db *Database) Node(hash common.Hash) ([]byte, error) {
223221
return nil, errors.New("not found")
224222
}
225223

226-
// Nodes retrieves the hashes of all the nodes cached within the memory database.
227-
// This method is extremely expensive and should only be used to validate internal
228-
// states in test code.
229-
func (db *Database) Nodes() []common.Hash {
230-
db.lock.RLock()
231-
defer db.lock.RUnlock()
232-
233-
var hashes = make([]common.Hash, 0, len(db.dirties))
234-
for hash := range db.dirties {
235-
hashes = append(hashes, hash)
236-
}
237-
return hashes
238-
}
239-
240224
// Reference adds a new reference from a parent node to a child node.
241225
// This function is used to add reference between internal trie node
242226
// and external node(e.g. storage trie root), all internal trie nodes
@@ -344,16 +328,16 @@ func (db *Database) dereference(hash common.Hash) {
344328

345329
// Cap iteratively flushes old but still referenced trie nodes until the total
346330
// memory usage goes below the given threshold.
347-
//
348-
// Note, this method is a non-synchronized mutator. It is unsafe to call this
349-
// concurrently with other mutators.
350331
func (db *Database) Cap(limit common.StorageSize) error {
332+
db.lock.Lock()
333+
defer db.lock.Unlock()
334+
351335
// Create a database batch to flush persistent data out. It is important that
352336
// outside code doesn't see an inconsistent state (referenced data removed from
353337
// memory cache during commit but not yet in persistent storage). This is ensured
354338
// by only uncaching existing data when the database write finalizes.
355-
nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now()
356339
batch := db.diskdb.NewBatch()
340+
nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now()
357341

358342
// db.dirtiesSize only contains the useful data in the cache, but when reporting
359343
// the total memory consumption, the maintenance metadata is also needed to be
@@ -391,9 +375,6 @@ func (db *Database) Cap(limit common.StorageSize) error {
391375
return err
392376
}
393377
// Write successful, clear out the flushed data
394-
db.lock.Lock()
395-
defer db.lock.Unlock()
396-
397378
for db.oldest != oldest {
398379
node := db.dirties[db.oldest]
399380
delete(db.dirties, db.oldest)
@@ -424,10 +405,10 @@ func (db *Database) Cap(limit common.StorageSize) error {
424405
// Commit iterates over all the children of a particular node, writes them out
425406
// to disk, forcefully tearing down all references in both directions. As a side
426407
// effect, all pre-images accumulated up to this point are also written.
427-
//
428-
// Note, this method is a non-synchronized mutator. It is unsafe to call this
429-
// concurrently with other mutators.
430408
func (db *Database) Commit(node common.Hash, report bool) error {
409+
db.lock.Lock()
410+
defer db.lock.Unlock()
411+
431412
// Create a database batch to flush persistent data out. It is important that
432413
// outside code doesn't see an inconsistent state (referenced data removed from
433414
// memory cache during commit but not yet in persistent storage). This is ensured
@@ -449,8 +430,6 @@ func (db *Database) Commit(node common.Hash, report bool) error {
449430
return err
450431
}
451432
// Uncache any leftovers in the last batch
452-
db.lock.Lock()
453-
defer db.lock.Unlock()
454433
if err := batch.Replay(uncacher); err != nil {
455434
return err
456435
}
@@ -499,13 +478,11 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane
499478
if err := batch.Write(); err != nil {
500479
return err
501480
}
502-
db.lock.Lock()
503481
err := batch.Replay(uncacher)
504-
batch.Reset()
505-
db.lock.Unlock()
506482
if err != nil {
507483
return err
508484
}
485+
batch.Reset()
509486
}
510487
return nil
511488
}
@@ -574,7 +551,7 @@ func (db *Database) Initialized(genesisRoot common.Hash) bool {
574551
func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error {
575552
// Ensure the parent state is present and signal a warning if not.
576553
if parent != types.EmptyRootHash {
577-
if blob, _ := db.Node(parent); len(blob) == 0 {
554+
if blob, _ := db.node(parent); len(blob) == 0 {
578555
log.Error("parent state is not present")
579556
}
580557
}
@@ -655,7 +632,7 @@ func (db *Database) Scheme() string {
655632
// Reader retrieves a node reader belonging to the given state root.
656633
// An error will be returned if the requested state is not available.
657634
func (db *Database) Reader(root common.Hash) (*reader, error) {
658-
if _, err := db.Node(root); err != nil {
635+
if _, err := db.node(root); err != nil {
659636
return nil, fmt.Errorf("state %#x is not available, %v", root, err)
660637
}
661638
return &reader{db: db}, nil
@@ -666,9 +643,9 @@ type reader struct {
666643
db *Database
667644
}
668645

669-
// Node retrieves the trie node with the given node hash.
670-
// No error will be returned if the node is not found.
646+
// Node retrieves the trie node with the given node hash. No error will be
647+
// returned if the node is not found.
671648
func (reader *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) {
672-
blob, _ := reader.db.Node(hash)
649+
blob, _ := reader.db.node(hash)
673650
return blob, nil
674651
}

0 commit comments

Comments
 (0)