Skip to content

Commit

Permalink
core/rawdb: introduce flush offset in freezer (#30392)
Browse files Browse the repository at this point in the history
This is a follow-up PR to #29792 to get rid of the data file sync.

**This is a non-backward compatible change, which increments the
database version from 8 to 9**.

We introduce a flushOffset for each freezer table, which tracks the position
of the most recently fsync’d item in the index file. When this offset moves
forward, it indicates that all index entries below it, along with their corresponding
data items, have been properly persisted to disk. The offset can also be moved
backward when truncating from either the head or tail of the file.

Previously, the data file required an explicit fsync after every mutation, which
was highly inefficient. With the introduction of the flush offset, the synchronization
strategy becomes more flexible, allowing the freezer to sync every 30 seconds
instead.

The data items above the flush offset are regarded volatile and callers must ensure
they are recoverable after the unclean shutdown, or explicitly sync the freezer
before any proceeding operations.

---------

Co-authored-by: Felix Lange <[email protected]>
  • Loading branch information
rjl493456442 and fjl authored Feb 4, 2025
1 parent e26dd77 commit 0ad0966
Show file tree
Hide file tree
Showing 8 changed files with 654 additions and 250 deletions.
10 changes: 8 additions & 2 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,23 +113,29 @@ const (
// * the `BlockNumber`, `TxHash`, `TxIndex`, `BlockHash` and `Index` fields of log are deleted
// * the `Bloom` field of receipt is deleted
// * the `BlockIndex` and `TxIndex` fields of txlookup are deleted
//
// - Version 5
// The following incompatible database changes were added:
// * the `TxHash`, `GasCost`, and `ContractAddress` fields are no longer stored for a receipt
// * the `TxHash`, `GasCost`, and `ContractAddress` fields are computed by looking up the
// receipts' corresponding block
//
// - Version 6
// The following incompatible database changes were added:
// * Transaction lookup information stores the corresponding block number instead of block hash
//
// - Version 7
// The following incompatible database changes were added:
// * Use freezer as the ancient database to maintain all ancient data
//
// - Version 8
// The following incompatible database changes were added:
// * New scheme for contract code in order to separate the codes and trie nodes
//
// - Version 9
// Total difficulty has been removed from both the key-value store and the
// ancient store, the td freezer table has been deprecated since that.
// The following incompatible database changes were added:
// * Total difficulty has been removed from both the key-value store and the ancient store.
// * The metadata structure of freezer is changed by adding 'flushOffset'
BlockChainVersion uint64 = 9
)

Expand Down
3 changes: 2 additions & 1 deletion core/rawdb/accessors_chain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,7 @@ func TestHeadersRLPStorage(t *testing.T) {
t.Fatalf("failed to create database with ancient backend")
}
defer db.Close()

// Create blocks
var chain []*types.Block
var pHash common.Hash
Expand All @@ -864,7 +865,7 @@ func TestHeadersRLPStorage(t *testing.T) {
chain = append(chain, block)
pHash = block.Hash()
}
var receipts []types.Receipts = make([]types.Receipts, 100)
receipts := make([]types.Receipts, 100)
// Write first half to ancients
WriteAncientBlocks(db, chain[:50], receipts[:50])
// Write second half to db
Expand Down
1 change: 1 addition & 0 deletions core/rawdb/ancient_scheme.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ const (
stateHistoryStorageData = "storage.data"
)

// stateFreezerNoSnappy configures whether compression is disabled for the state freezer.
var stateFreezerNoSnappy = map[string]bool{
stateHistoryMeta: true,
stateHistoryAccountIndex: false,
Expand Down
10 changes: 7 additions & 3 deletions core/rawdb/freezer_batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package rawdb
import (
"fmt"
"math"
"time"

"github.com/ethereum/go-ethereum/rlp"
"github.com/golang/snappy"
Expand Down Expand Up @@ -188,9 +189,6 @@ func (batch *freezerTableBatch) commit() error {
if err != nil {
return err
}
if err := batch.t.head.Sync(); err != nil {
return err
}
dataSize := int64(len(batch.dataBuffer))
batch.dataBuffer = batch.dataBuffer[:0]

Expand All @@ -208,6 +206,12 @@ func (batch *freezerTableBatch) commit() error {
// Update metrics.
batch.t.sizeGauge.Inc(dataSize + indexSize)
batch.t.writeMeter.Mark(dataSize + indexSize)

// Periodically sync the table, todo (rjl493456442) make it configurable?
if time.Since(batch.t.lastSync) > 30*time.Second {
batch.t.lastSync = time.Now()
return batch.t.Sync()
}
return nil
}

Expand Down
186 changes: 133 additions & 53 deletions core/rawdb/freezer_meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,93 +17,173 @@
package rawdb

import (
"errors"
"io"
"math"
"os"

"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
)

const freezerVersion = 1 // The initial version tag of freezer table metadata
const (
freezerTableV1 = 1 // Initial version of metadata struct
freezerTableV2 = 2 // Add field: 'flushOffset'
freezerVersion = freezerTableV2 // The current used version
)

// freezerTableMeta wraps all the metadata of the freezer table.
// freezerTableMeta is a collection of additional properties that describe the
// freezer table. These properties are designed with error resilience, allowing
// them to be automatically corrected after an error occurs without significantly
// impacting overall correctness.
type freezerTableMeta struct {
// Version is the versioning descriptor of the freezer table.
Version uint16
file *os.File // file handler of metadata
version uint16 // version descriptor of the freezer table

// VirtualTail indicates how many items have been marked as deleted.
// Its value is equal to the number of items removed from the table
// plus the number of items hidden in the table, so it should never
// be lower than the "actual tail".
VirtualTail uint64
}
// virtualTail represents the number of items marked as deleted. It is
// calculated as the sum of items removed from the table and the items
// hidden within the table, and should never be less than the "actual
// tail".
//
// If lost due to a crash or other reasons, it will be reset to the number
// of items deleted from the table, causing the previously hidden items
// to become visible, which is an acceptable consequence.
virtualTail uint64

// newMetadata initializes the metadata object with the given virtual tail.
func newMetadata(tail uint64) *freezerTableMeta {
return &freezerTableMeta{
Version: freezerVersion,
VirtualTail: tail,
}
// flushOffset represents the offset in the index file up to which the index
// items along with the corresponding data items in data files has been flushed
// (fsync’d) to disk. Beyond this offset, data integrity is not guaranteed,
// the extra index items along with the associated data items should be removed
// during the startup.
//
// The principle is that all data items above the flush offset are considered
// volatile and should be recoverable if they are discarded after the unclean
// shutdown. If data integrity is required, manually force a sync of the
// freezer before proceeding with further operations (e.g. do freezer.Sync()
// first and then write data to key value store in some circumstances).
//
// The offset could be moved forward by applying sync operation, or be moved
// backward in cases of head/tail truncation, etc.
flushOffset int64
}

// readMetadata reads the metadata of the freezer table from the
// given metadata file.
func readMetadata(file *os.File) (*freezerTableMeta, error) {
// decodeV1 attempts to decode the metadata structure in v1 format. If fails or
// the result is incompatible, nil is returned.
func decodeV1(file *os.File) *freezerTableMeta {
_, err := file.Seek(0, io.SeekStart)
if err != nil {
return nil, err
return nil
}
var meta freezerTableMeta
if err := rlp.Decode(file, &meta); err != nil {
return nil, err
type obj struct {
Version uint16
Tail uint64
}
var o obj
if err := rlp.Decode(file, &o); err != nil {
return nil
}
if o.Version != freezerTableV1 {
return nil
}
return &freezerTableMeta{
file: file,
version: o.Version,
virtualTail: o.Tail,
}
return &meta, nil
}

// writeMetadata writes the metadata of the freezer table into the
// given metadata file.
func writeMetadata(file *os.File, meta *freezerTableMeta) error {
// decodeV2 attempts to decode the metadata structure in v2 format. If fails or
// the result is incompatible, nil is returned.
func decodeV2(file *os.File) *freezerTableMeta {
_, err := file.Seek(0, io.SeekStart)
if err != nil {
return err
return nil
}
type obj struct {
Version uint16
Tail uint64
Offset uint64
}
var o obj
if err := rlp.Decode(file, &o); err != nil {
return nil
}
if o.Version != freezerTableV2 {
return nil
}
if o.Offset > math.MaxInt64 {
log.Error("Invalid flushOffset %d in freezer metadata", o.Offset, "file", file.Name())
return nil
}
return &freezerTableMeta{
file: file,
version: freezerTableV2,
virtualTail: o.Tail,
flushOffset: int64(o.Offset),
}
return rlp.Encode(file, meta)
}

// loadMetadata loads the metadata from the given metadata file.
// Initializes the metadata file with the given "actual tail" if
// it's empty.
func loadMetadata(file *os.File, tail uint64) (*freezerTableMeta, error) {
// newMetadata initializes the metadata object, either by loading it from the file
// or by constructing a new one from scratch.
func newMetadata(file *os.File) (*freezerTableMeta, error) {
stat, err := file.Stat()
if err != nil {
return nil, err
}
// Write the metadata with the given actual tail into metadata file
// if it's non-existent. There are two possible scenarios here:
// - the freezer table is empty
// - the freezer table is legacy
// In both cases, write the meta into the file with the actual tail
// as the virtual tail.
if stat.Size() == 0 {
m := newMetadata(tail)
if err := writeMetadata(file, m); err != nil {
m := &freezerTableMeta{
file: file,
version: freezerTableV2,
virtualTail: 0,
flushOffset: 0,
}
if err := m.write(true); err != nil {
return nil, err
}
return m, nil
}
m, err := readMetadata(file)
if m := decodeV2(file); m != nil {
return m, nil
}
if m := decodeV1(file); m != nil {
return m, nil // legacy metadata
}
return nil, errors.New("failed to decode metadata")
}

// setVirtualTail sets the virtual tail and flushes the metadata if sync is true.
func (m *freezerTableMeta) setVirtualTail(tail uint64, sync bool) error {
m.virtualTail = tail
return m.write(sync)
}

// setFlushOffset sets the flush offset and flushes the metadata if sync is true.
func (m *freezerTableMeta) setFlushOffset(offset int64, sync bool) error {
m.flushOffset = offset
return m.write(sync)
}

// write flushes the content of metadata into file and performs a fsync if required.
func (m *freezerTableMeta) write(sync bool) error {
type obj struct {
Version uint16
Tail uint64
Offset uint64
}
var o obj
o.Version = freezerVersion // forcibly use the current version
o.Tail = m.virtualTail
o.Offset = uint64(m.flushOffset)

_, err := m.file.Seek(0, io.SeekStart)
if err != nil {
return nil, err
return err
}
// Update the virtual tail with the given actual tail if it's even
// lower than it. Theoretically it shouldn't happen at all, print
// a warning here.
if m.VirtualTail < tail {
log.Warn("Updated virtual tail", "have", m.VirtualTail, "now", tail)
m.VirtualTail = tail
if err := writeMetadata(file, m); err != nil {
return nil, err
}
if err := rlp.Encode(m.file, &o); err != nil {
return err
}
if !sync {
return nil
}
return m, nil
return m.file.Sync()
}
Loading

0 comments on commit 0ad0966

Please sign in to comment.