Skip to content

Commit 9caec06

Browse files
authored
feat(server/v2/cometbft): optimistic execution (#22560)
Co-authored-by: Randy Grok <@faulttolerance.net>
1 parent ca48cef commit 9caec06

File tree

5 files changed

+416
-46
lines changed

5 files changed

+416
-46
lines changed

server/v2/cometbft/abci.go

+112-37
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
abci "github.com/cometbft/cometbft/abci/types"
1212
abciproto "github.com/cometbft/cometbft/api/cometbft/abci/v1"
1313
gogoproto "github.com/cosmos/gogoproto/proto"
14-
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
14+
"google.golang.org/protobuf/reflect/protoreflect"
1515
"google.golang.org/protobuf/reflect/protoregistry"
1616

1717
"cosmossdk.io/collections"
@@ -28,6 +28,7 @@ import (
2828
"cosmossdk.io/server/v2/appmanager"
2929
"cosmossdk.io/server/v2/cometbft/handlers"
3030
"cosmossdk.io/server/v2/cometbft/mempool"
31+
"cosmossdk.io/server/v2/cometbft/oe"
3132
"cosmossdk.io/server/v2/cometbft/types"
3233
cometerrors "cosmossdk.io/server/v2/cometbft/types/errors"
3334
"cosmossdk.io/server/v2/streaming"
@@ -77,6 +78,11 @@ type consensus[T transaction.Tx] struct {
7778
extendVote handlers.ExtendVoteHandler
7879
checkTxHandler handlers.CheckTxHandler[T]
7980

81+
// optimisticExec contains the context required for Optimistic Execution,
82+
// including the goroutine handling.This is experimental and must be enabled
83+
// by developers.
84+
optimisticExec *oe.OptimisticExecution[T]
85+
8086
addrPeerFilter types.PeerFilter // filter peers by address and port
8187
idPeerFilter types.PeerFilter // filter peers by node ID
8288

@@ -385,6 +391,14 @@ func (c *consensus[T]) PrepareProposal(
385391
return nil, errors.New("no prepare proposal function was set")
386392
}
387393

394+
// Abort any running OE so it cannot overlap with `PrepareProposal`. This could happen if optimistic
395+
// `internalFinalizeBlock` from previous round takes a long time, but consensus has moved on to next round.
396+
// Overlap is undesirable, since `internalFinalizeBlock` and `PrepareProoposal` could share access to
397+
// in-memory structs depending on application implementation.
398+
// No-op if OE is not enabled.
399+
// Similar call to Abort() is done in `ProcessProposal`.
400+
c.optimisticExec.Abort()
401+
388402
ciCtx := contextWithCometInfo(ctx, comet.Info{
389403
Evidence: toCoreEvidence(req.Misbehavior),
390404
ValidatorsHash: req.NextValidatorsHash,
@@ -421,6 +435,16 @@ func (c *consensus[T]) ProcessProposal(
421435
return nil, errors.New("no process proposal function was set")
422436
}
423437

438+
// Since the application can get access to FinalizeBlock state and write to it,
439+
// we must be sure to reset it in case ProcessProposal timeouts and is called
440+
// again in a subsequent round. However, we only want to do this after we've
441+
// processed the first block, as we want to avoid overwriting the finalizeState
442+
// after state changes during InitChain.
443+
if req.Height > int64(c.initialHeight) {
444+
// abort any running OE
445+
c.optimisticExec.Abort()
446+
}
447+
424448
ciCtx := contextWithCometInfo(ctx, comet.Info{
425449
Evidence: toCoreEvidence(req.Misbehavior),
426450
ValidatorsHash: req.NextValidatorsHash,
@@ -436,6 +460,17 @@ func (c *consensus[T]) ProcessProposal(
436460
}, nil
437461
}
438462

463+
// Only execute optimistic execution if the proposal is accepted, OE is
464+
// enabled and the block height is greater than the initial height. During
465+
// the first block we'll be carrying state from InitChain, so it would be
466+
// impossible for us to easily revert.
467+
// After the first block has been processed, the next blocks will get executed
468+
// optimistically, so that when the ABCI client calls `FinalizeBlock` the app
469+
// can have a response ready.
470+
if req.Height > int64(c.initialHeight) {
471+
c.optimisticExec.Execute(req)
472+
}
473+
439474
return &abciproto.ProcessProposalResponse{
440475
Status: abciproto.PROCESS_PROPOSAL_STATUS_ACCEPT,
441476
}, nil
@@ -447,46 +482,40 @@ func (c *consensus[T]) FinalizeBlock(
447482
ctx context.Context,
448483
req *abciproto.FinalizeBlockRequest,
449484
) (*abciproto.FinalizeBlockResponse, error) {
450-
if err := c.validateFinalizeBlockHeight(req); err != nil {
451-
return nil, err
452-
}
453-
454-
if err := c.checkHalt(req.Height, req.Time); err != nil {
455-
return nil, err
456-
}
457-
458-
// TODO(tip): can we expect some txs to not decode? if so, what we do in this case? this does not seem to be the case,
459-
// considering that prepare and process always decode txs, assuming they're the ones providing txs we should never
460-
// have a tx that fails decoding.
461-
decodedTxs, err := decodeTxs(req.Txs, c.txCodec)
462-
if err != nil {
463-
return nil, err
464-
}
485+
var (
486+
resp *server.BlockResponse
487+
newState store.WriterMap
488+
decodedTxs []T
489+
err error
490+
)
491+
492+
if c.optimisticExec.Initialized() {
493+
// check if the hash we got is the same as the one we are executing
494+
aborted := c.optimisticExec.AbortIfNeeded(req.Hash)
495+
496+
// Wait for the OE to finish, regardless of whether it was aborted or not
497+
res, optimistErr := c.optimisticExec.WaitResult()
498+
499+
if !aborted {
500+
if res != nil {
501+
resp = res.Resp
502+
newState = res.StateChanges
503+
decodedTxs = res.DecodedTxs
504+
}
465505

466-
cid, err := c.store.LastCommitID()
467-
if err != nil {
468-
return nil, err
469-
}
506+
if optimistErr != nil {
507+
return nil, optimistErr
508+
}
509+
}
470510

471-
blockReq := &server.BlockRequest[T]{
472-
Height: uint64(req.Height),
473-
Time: req.Time,
474-
Hash: req.Hash,
475-
AppHash: cid.Hash,
476-
ChainId: c.chainID,
477-
Txs: decodedTxs,
511+
c.optimisticExec.Reset()
478512
}
479513

480-
ciCtx := contextWithCometInfo(ctx, comet.Info{
481-
Evidence: toCoreEvidence(req.Misbehavior),
482-
ValidatorsHash: req.NextValidatorsHash,
483-
ProposerAddress: req.ProposerAddress,
484-
LastCommit: toCoreCommitInfo(req.DecidedLastCommit),
485-
})
486-
487-
resp, newState, err := c.app.DeliverBlock(ciCtx, blockReq)
488-
if err != nil {
489-
return nil, err
514+
if resp == nil { // if we didn't run OE, run the normal finalize block
515+
resp, newState, decodedTxs, err = c.internalFinalizeBlock(ctx, req)
516+
if err != nil {
517+
return nil, err
518+
}
490519
}
491520

492521
// after we get the changeset we can produce the commit hash,
@@ -531,6 +560,52 @@ func (c *consensus[T]) FinalizeBlock(
531560
return finalizeBlockResponse(resp, cp, appHash, c.indexedEvents, c.cfg.AppTomlConfig.Trace)
532561
}
533562

563+
func (c *consensus[T]) internalFinalizeBlock(
564+
ctx context.Context,
565+
req *abciproto.FinalizeBlockRequest,
566+
) (*server.BlockResponse, store.WriterMap, []T, error) {
567+
if err := c.validateFinalizeBlockHeight(req); err != nil {
568+
return nil, nil, nil, err
569+
}
570+
571+
if err := c.checkHalt(req.Height, req.Time); err != nil {
572+
return nil, nil, nil, err
573+
}
574+
575+
// TODO(tip): can we expect some txs to not decode? if so, what we do in this case? this does not seem to be the case,
576+
// considering that prepare and process always decode txs, assuming they're the ones providing txs we should never
577+
// have a tx that fails decoding.
578+
decodedTxs, err := decodeTxs(req.Txs, c.txCodec)
579+
if err != nil {
580+
return nil, nil, nil, err
581+
}
582+
583+
cid, err := c.store.LastCommitID()
584+
if err != nil {
585+
return nil, nil, nil, err
586+
}
587+
588+
blockReq := &server.BlockRequest[T]{
589+
Height: uint64(req.Height),
590+
Time: req.Time,
591+
Hash: req.Hash,
592+
AppHash: cid.Hash,
593+
ChainId: c.chainID,
594+
Txs: decodedTxs,
595+
}
596+
597+
ciCtx := contextWithCometInfo(ctx, comet.Info{
598+
Evidence: toCoreEvidence(req.Misbehavior),
599+
ValidatorsHash: req.NextValidatorsHash,
600+
ProposerAddress: req.ProposerAddress,
601+
LastCommit: toCoreCommitInfo(req.DecidedLastCommit),
602+
})
603+
604+
resp, stateChanges, err := c.app.DeliverBlock(ciCtx, blockReq)
605+
606+
return resp, stateChanges, decodedTxs, err
607+
}
608+
534609
// Commit implements types.Application.
535610
// It is called by cometbft to notify the application that a block was committed.
536611
func (c *consensus[T]) Commit(ctx context.Context, _ *abciproto.CommitRequest) (*abciproto.CommitResponse, error) {

server/v2/cometbft/abci_test.go

+91-8
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,18 @@ package cometbft
22

33
import (
44
"context"
5+
"cosmossdk.io/core/server"
56
"crypto/sha256"
67
"encoding/json"
8+
"errors"
9+
abci "github.com/cometbft/cometbft/abci/types"
710
"io"
811
"strings"
912
"sync"
1013
"testing"
1114
"time"
1215

16+
"cosmossdk.io/server/v2/cometbft/oe"
1317
abciproto "github.com/cometbft/cometbft/api/cometbft/abci/v1"
1418
v1 "github.com/cometbft/cometbft/api/cometbft/types/v1"
1519
"github.com/cosmos/gogoproto/proto"
@@ -56,10 +60,10 @@ func getQueryRouterBuilder[T any, PT interface {
5660
*T
5761
proto.Message
5862
},
59-
U any, UT interface {
60-
*U
61-
proto.Message
62-
}](
63+
U any, UT interface {
64+
*U
65+
proto.Message
66+
}](
6367
t *testing.T,
6468
handler func(ctx context.Context, msg PT) (UT, error),
6569
) *stf.MsgRouterBuilder {
@@ -86,10 +90,10 @@ func getMsgRouterBuilder[T any, PT interface {
8690
*T
8791
transaction.Msg
8892
},
89-
U any, UT interface {
90-
*U
91-
transaction.Msg
92-
}](
93+
U any, UT interface {
94+
*U
95+
transaction.Msg
96+
}](
9397
t *testing.T,
9498
handler func(ctx context.Context, msg PT) (UT, error),
9599
) *stf.MsgRouterBuilder {
@@ -514,6 +518,12 @@ func TestConsensus_ProcessProposal(t *testing.T) {
514518
require.Error(t, err)
515519

516520
// NoOp handler
521+
// dummy optimistic execution
522+
optimisticMockFunc := func(context.Context, *abci.FinalizeBlockRequest) (*server.BlockResponse, store.WriterMap, []mock.Tx, error) {
523+
return nil, nil, nil, errors.New("test error")
524+
}
525+
c.optimisticExec = oe.NewOptimisticExecution[mock.Tx](log.NewNopLogger(), optimisticMockFunc)
526+
517527
c.processProposalHandler = DefaultServerOptions[mock.Tx]().ProcessProposalHandler
518528
_, err = c.ProcessProposal(context.Background(), &abciproto.ProcessProposalRequest{
519529
Height: 1,
@@ -724,3 +734,76 @@ func assertStoreLatestVersion(t *testing.T, store types.Store, target uint64) {
724734
require.NoError(t, err)
725735
require.Equal(t, target, commitInfo.Version)
726736
}
737+
738+
func TestOptimisticExecution(t *testing.T) {
739+
c := setUpConsensus(t, 100_000, mempool.NoOpMempool[mock.Tx]{})
740+
741+
// Set up handlers
742+
c.processProposalHandler = DefaultServerOptions[mock.Tx]().ProcessProposalHandler
743+
744+
// mock optimistic execution
745+
calledTimes := 0
746+
optimisticMockFunc := func(context.Context, *abci.FinalizeBlockRequest) (*server.BlockResponse, store.WriterMap, []mock.Tx, error) {
747+
calledTimes++
748+
return nil, nil, nil, errors.New("test error")
749+
}
750+
c.optimisticExec = oe.NewOptimisticExecution[mock.Tx](log.NewNopLogger(), optimisticMockFunc)
751+
752+
_, err := c.InitChain(context.Background(), &abciproto.InitChainRequest{
753+
Time: time.Now(),
754+
ChainId: "test",
755+
InitialHeight: 1,
756+
})
757+
require.NoError(t, err)
758+
759+
_, err = c.FinalizeBlock(context.Background(), &abciproto.FinalizeBlockRequest{
760+
Time: time.Now(),
761+
Height: 1,
762+
Txs: [][]byte{mockTx.Bytes()},
763+
Hash: emptyHash[:],
764+
})
765+
require.NoError(t, err)
766+
767+
theHash := sha256.Sum256([]byte("test"))
768+
ppReq := &abciproto.ProcessProposalRequest{
769+
Height: 2,
770+
Hash: theHash[:],
771+
Time: time.Now(),
772+
Txs: [][]byte{mockTx.Bytes()},
773+
}
774+
775+
// Start optimistic execution
776+
resp, err := c.ProcessProposal(context.Background(), ppReq)
777+
require.NoError(t, err)
778+
require.Equal(t, resp.Status, abciproto.PROCESS_PROPOSAL_STATUS_ACCEPT)
779+
780+
// Initialize FinalizeBlock with correct hash - should use optimistic result
781+
theHash = sha256.Sum256([]byte("test"))
782+
fbReq := &abciproto.FinalizeBlockRequest{
783+
Height: 2,
784+
Hash: theHash[:],
785+
Time: ppReq.Time,
786+
Txs: ppReq.Txs,
787+
}
788+
fbResp, err := c.FinalizeBlock(context.Background(), fbReq)
789+
require.Error(t, err)
790+
require.ErrorContains(t, err, "test error") // from optimisticMockFunc
791+
require.Equal(t, 1, calledTimes)
792+
793+
resp, err = c.ProcessProposal(context.Background(), ppReq)
794+
require.NoError(t, err)
795+
require.Equal(t, resp.Status, abciproto.PROCESS_PROPOSAL_STATUS_ACCEPT)
796+
797+
theWrongHash := sha256.Sum256([]byte("wrong_hash"))
798+
fbReq.Hash = theWrongHash[:]
799+
800+
// Initialize FinalizeBlock with wrong hash - should abort optimistic execution
801+
// Because is aborted, the result comes from the normal execution
802+
fbResp, err = c.FinalizeBlock(context.Background(), fbReq)
803+
require.NotNil(t, fbResp)
804+
require.NoError(t, err)
805+
require.Equal(t, 2, calledTimes)
806+
807+
// Verify optimistic execution was reset
808+
require.False(t, c.optimisticExec.Initialized())
809+
}

0 commit comments

Comments
 (0)