Skip to content
Draft
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
1cf5e6c
feat: add tracing
aaronc Oct 29, 2025
a207f94
work on otel tracer impl
aaronc Oct 29, 2025
ee0bfb3
add basic baseapp tracing
aaronc Oct 29, 2025
d5f5ea4
latest WIP
aaronc Oct 30, 2025
47f83e8
add trace exporter setup
aaronc Oct 30, 2025
7fafce3
fixes
aaronc Oct 30, 2025
bdba035
simapp setup, make tracers wrap loggers
aaronc Oct 30, 2025
25e3135
add test setup
aaronc Oct 30, 2025
5c7e464
fix shutdown order
aaronc Oct 30, 2025
d71f7c1
block trace nesting
aaronc Oct 30, 2025
56b215a
update metrics config and instrumentation
aaronc Oct 30, 2025
f9ce55c
start adding otel metric config
aaronc Oct 31, 2025
3fff00f
migrate to pure otel setup
aaronc Oct 31, 2025
5077567
fixes
aaronc Oct 31, 2025
31536b6
add basic metrics
aaronc Oct 31, 2025
c922688
add telemetry shutdown hook
aaronc Oct 31, 2025
ed891cc
docs, cleanup
aaronc Oct 31, 2025
f685bd4
WIP on removing go-metrics
aaronc Oct 31, 2025
42da2f7
Merge branch 'main' of github.com:cosmos/cosmos-sdk into aaronc/traci…
aaronc Oct 31, 2025
699f5d3
setup sim test flag
aaronc Oct 31, 2025
5df2460
integrate slog logging
aaronc Oct 31, 2025
1c84edb
update to use official env var
aaronc Oct 31, 2025
46e4bcb
add README.md
aaronc Nov 3, 2025
f0c3955
delete spaces
aaronc Nov 3, 2025
7dfb754
setup TestingMain
aaronc Nov 3, 2025
1ce344b
update suggested config in README.md
aaronc Nov 3, 2025
edbae92
add otel custom config options
aaronc Nov 3, 2025
0f8085a
add otel custom config options
aaronc Nov 3, 2025
03b6069
add more instrumentation
aaronc Nov 3, 2025
c4dbd07
remove pretty print
aaronc Nov 4, 2025
2a1dffd
Merge branch 'main' of github.com:cosmos/cosmos-sdk into aaronc/traci…
aaronc Nov 6, 2025
7b3ee34
add hashicorp/go-metrics otel/compatibility layer
aaronc Nov 6, 2025
06e1245
bring back server telemetry config, revert unrelated changes
aaronc Nov 6, 2025
cf720f9
address review comments, remove timing histograms
aaronc Nov 6, 2025
743fdc9
support otel routing through legacy metrics config
aaronc Nov 6, 2025
5bfe192
go mod tidy, bring in additional config from iavlx branch
aaronc Nov 6, 2025
4f50709
revert unrelated changes
aaronc Nov 7, 2025
c3e781d
bug fix & better go-metric -> otel naming
aaronc Nov 7, 2025
6ddac1d
propagate context better and add some basic tracing for grpc queries
aaronc Nov 11, 2025
70e8fa8
basic log unification
aaronc Nov 11, 2025
2258c1f
Merge branch 'main' of github.com:cosmos/cosmos-sdk into aaronc/traci…
aaronc Nov 13, 2025
0968a22
go mod tidy
aaronc Nov 13, 2025
6fb271d
update CHANGELOG.md, UPGRADING.md and remove example telemetry config…
aaronc Nov 13, 2025
90885bb
update CHANGELOG.md, UPGRADING.md
aaronc Nov 13, 2025
5e9c6ba
update docs
aaronc Nov 13, 2025
a1f27c8
revert any logging configuration, other than setting up the otel logg…
aaronc Nov 14, 2025
6c9cde1
Merge branch 'main' of github.com:cosmos/cosmos-sdk into aaronc/traci…
aaronc Nov 14, 2025
75279fa
lint fix
aaronc Nov 14, 2025
10436c9
Update UPGRADING.md
aaronc Nov 14, 2025
b870b7c
re-add telemetry toml
technicallyty Nov 17, 2025
3b2440c
add deprecation notice to telemetry toml config
technicallyty Nov 18, 2025
2c20c90
remove legacy metrics init in opentelemetry init
technicallyty Nov 18, 2025
033e51d
Merge remote-tracking branch 'origin/main' into aaronc/tracing-metrics
technicallyty Nov 18, 2025
ebe7fd9
re-add changelog entries
technicallyty Nov 18, 2025
3a188c1
Merge remote-tracking branch 'origin/main' into aaronc/tracing-metrics
technicallyty Nov 19, 2025
9e09f53
changelog.md
technicallyty Nov 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,5 @@ debug_container.log
*.synctex.gz
/x/genutil/config/priv_validator_key.json
/x/genutil/data/priv_validator_state.json
/.envrc
/.env
45 changes: 38 additions & 7 deletions baseapp/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
)

func (app *BaseApp) InitChain(req *abci.RequestInitChain) (*abci.ResponseInitChain, error) {
_, span := tracer.Start(context.Background(), "InitChain")
defer span.End()

if req.ChainId != app.chainID {
return nil, fmt.Errorf("invalid chain-id on InitChain; expected: %s, got: %s", app.chainID, req.ChainId)
}
Expand Down Expand Up @@ -152,7 +155,11 @@

// Query implements the ABCI interface. It delegates to CommitMultiStore if it
// implements Queryable.
func (app *BaseApp) Query(_ context.Context, req *abci.RequestQuery) (resp *abci.ResponseQuery, err error) {
func (app *BaseApp) Query(ctx context.Context, req *abci.RequestQuery) (resp *abci.ResponseQuery, err error) {
// TODO: propagate context with span into the sdk.Context used for queries
_, span := tracer.Start(ctx, "Query")
defer span.End()

// add panic recovery for all queries
//
// Ref: https://github.com/cosmos/cosmos-sdk/pull/8039
Expand All @@ -167,9 +174,9 @@
req.Height = app.LastBlockHeight()
}

telemetry.IncrCounter(1, "query", "count")

Check failure on line 177 in baseapp/abci.go

View workflow job for this annotation

GitHub Actions / golangci-lint

SA1019: telemetry.IncrCounter is deprecated: IncrCounter provides a wrapper functionality for emitting a counter metric with global labels (if any). (staticcheck)
telemetry.IncrCounter(1, "query", req.Path)

Check failure on line 178 in baseapp/abci.go

View workflow job for this annotation

GitHub Actions / golangci-lint

SA1019: telemetry.IncrCounter is deprecated: IncrCounter provides a wrapper functionality for emitting a counter metric with global labels (if any). (staticcheck)
defer telemetry.MeasureSince(telemetry.Now(), req.Path)

Check failure on line 179 in baseapp/abci.go

View workflow job for this annotation

GitHub Actions / golangci-lint

SA1019: telemetry.MeasureSince is deprecated: MeasureSince provides a wrapper functionality for emitting a time measure metric with global labels (if any). (staticcheck)

if req.Path == QueryPathBroadcastTx {
return sdkerrors.QueryResult(errorsmod.Wrap(sdkerrors.ErrInvalidRequest, "can't route a broadcast tx message"), app.trace), nil
Expand Down Expand Up @@ -342,6 +349,9 @@
// will contain relevant error information. Regardless of tx execution outcome,
// the ResponseCheckTx will contain the relevant gas execution context.
func (app *BaseApp) CheckTx(req *abci.RequestCheckTx) (*abci.ResponseCheckTx, error) {
_, span := tracer.Start(context.Background(), "CheckTx")
defer span.End()

var mode sdk.ExecMode

switch req.Type {
Expand Down Expand Up @@ -454,7 +464,10 @@
}
}()

resp, err = app.abciHandlers.PrepareProposalHandler(prepareProposalState.Context(), req)
ctx := prepareProposalState.Context()
ctx, span := ctx.StartSpan(tracer, "PrepareProposal")
defer span.End()
resp, err = app.abciHandlers.PrepareProposalHandler(ctx, req)
if err != nil {
app.logger.Error("failed to prepare proposal", "height", req.Height, "time", req.Time, "err", err)
return &abci.ResponsePrepareProposal{Txs: req.Txs}, nil
Expand Down Expand Up @@ -513,7 +526,10 @@
}

processProposalState := app.stateManager.GetState(execModeProcessProposal)
processProposalState.SetContext(app.getContextForProposal(processProposalState.Context(), req.Height).
ctx := processProposalState.Context()
ctx, span := ctx.StartSpan(tracer, "ProcessProposal")
defer span.End()
processProposalState.SetContext(app.getContextForProposal(ctx, req.Height).
WithVoteInfos(req.ProposedLastCommit.Votes). // this is a set of votes that are not finalized yet, wait for commit
WithBlockHeight(req.Height).
WithBlockTime(req.Time).
Expand Down Expand Up @@ -595,6 +611,9 @@
return nil, errors.New("application ExtendVote handler not set")
}

ctx, span := ctx.StartSpan(tracer, "ExtendVote")
defer span.End()

// If vote extensions are not enabled, as a safety precaution, we return an
// error.
cp := app.GetConsensusParams(ctx)
Expand Down Expand Up @@ -666,6 +685,9 @@
ctx = sdk.NewContext(ms, emptyHeader, false, app.logger).WithStreamingManager(app.streamingManager)
}

ctx, span := ctx.StartSpan(tracer, "VerifyVoteExtension")
defer span.End()

// If vote extensions are not enabled, as a safety precaution, we return an
// error.
cp := app.GetConsensusParams(ctx)
Expand Down Expand Up @@ -716,7 +738,7 @@
// Execution flow or by the FinalizeBlock ABCI method. The context received is
// only used to handle early cancellation, for anything related to state app.stateManager.GetState(execModeFinalize).Context()
// must be used.
func (app *BaseApp) internalFinalizeBlock(ctx context.Context, req *abci.RequestFinalizeBlock) (*abci.ResponseFinalizeBlock, error) {
func (app *BaseApp) internalFinalizeBlock(goCtx context.Context, req *abci.RequestFinalizeBlock) (*abci.ResponseFinalizeBlock, error) {
var events []abci.Event

if err := app.checkHalt(req.Height, req.Time); err != nil {
Expand Down Expand Up @@ -750,9 +772,12 @@
app.stateManager.SetState(execModeFinalize, app.cms, header, app.logger, app.streamingManager)
finalizeState = app.stateManager.GetState(execModeFinalize)
}
ctx := finalizeState.Context().WithContext(goCtx)
ctx, span := ctx.StartSpan(tracer, "internalFinalizeBlock")
defer span.End()

// Context is now updated with Header information.
finalizeState.SetContext(finalizeState.Context().
finalizeState.SetContext(ctx.
WithBlockHeader(header).
WithHeaderHash(req.Hash).
WithHeaderInfo(coreheader.Info{
Expand Down Expand Up @@ -846,7 +871,7 @@
WithBlockGasUsed(blockGasUsed).
WithBlockGasWanted(blockGasWanted),
)
endBlock, err := app.endBlock(finalizeState.Context())
endBlock, err := app.endBlock()
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -959,7 +984,11 @@
// height.
func (app *BaseApp) Commit() (*abci.ResponseCommit, error) {
finalizeState := app.stateManager.GetState(execModeFinalize)
header := finalizeState.Context().BlockHeader()
ctx := finalizeState.Context()
ctx, span := ctx.StartSpan(tracer, "Commit")
defer span.End()

header := ctx.BlockHeader()
retainHeight := app.GetBlockRetentionHeight(header.Height)

if app.abciHandlers.Precommiter != nil {
Expand Down Expand Up @@ -1005,6 +1034,8 @@
// The SnapshotIfApplicable method will create the snapshot by starting the goroutine
app.snapshotManager.SnapshotIfApplicable(header.Height)

blockCnt.Add(ctx, 1)

return resp, nil
}

Expand Down
63 changes: 57 additions & 6 deletions baseapp/baseapp.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package baseapp

import (
"context"
"fmt"
"maps"
"math"
Expand All @@ -15,22 +14,28 @@
cmtproto "github.com/cometbft/cometbft/proto/tendermint/types"
dbm "github.com/cosmos/cosmos-db"
"github.com/cosmos/gogoproto/proto"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/trace"
protov2 "google.golang.org/protobuf/proto"

errorsmod "cosmossdk.io/errors"

Check failure on line 23 in baseapp/baseapp.go

View workflow job for this annotation

GitHub Actions / Analyze

File is not properly formatted (gci)

Check failure on line 23 in baseapp/baseapp.go

View workflow job for this annotation

GitHub Actions / golangci-lint

File is not properly formatted (gci)
"cosmossdk.io/log"
"cosmossdk.io/store"
storemetrics "cosmossdk.io/store/metrics"
"cosmossdk.io/store/snapshots"
storetypes "cosmossdk.io/store/types"

"cosmossdk.io/log"

"github.com/cosmos/cosmos-sdk/baseapp/config"
"github.com/cosmos/cosmos-sdk/baseapp/oe"
"github.com/cosmos/cosmos-sdk/baseapp/state"
"github.com/cosmos/cosmos-sdk/codec"
codectypes "github.com/cosmos/cosmos-sdk/codec/types"
servertypes "github.com/cosmos/cosmos-sdk/server/types"
"github.com/cosmos/cosmos-sdk/telemetry"

Check failure on line 37 in baseapp/baseapp.go

View workflow job for this annotation

GitHub Actions / golangci-lint

ST1019: package "github.com/cosmos/cosmos-sdk/telemetry" is being imported more than once (staticcheck)
_ "github.com/cosmos/cosmos-sdk/telemetry" // need to initialize telemetry before we declare tracer and metrics

Check failure on line 38 in baseapp/baseapp.go

View workflow job for this annotation

GitHub Actions / golangci-lint

ST1019(related information): other import of "github.com/cosmos/cosmos-sdk/telemetry" (staticcheck)
sdk "github.com/cosmos/cosmos-sdk/types"
sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
"github.com/cosmos/cosmos-sdk/types/mempool"
Expand Down Expand Up @@ -59,6 +64,25 @@

var _ servertypes.ABCI = (*BaseApp)(nil)

var (
tracer = otel.Tracer("cosmos-sdk/baseapp")
meter = otel.Meter("cosmos-sdk/baseapp")
blockCnt metric.Int64Counter
txCnt metric.Int64Counter
)

func init() {
var err error
blockCnt, err = meter.Int64Counter("block.count")
if err != nil {
panic(err)
}
txCnt, err = meter.Int64Counter("tx.count")
if err != nil {
panic(err)
}
}

// BaseApp reflects the ABCI application implementation.
type BaseApp struct {
// initialized on creation
Expand Down Expand Up @@ -186,6 +210,8 @@
gasConfig: config.GasConfig{QueryGasLimit: math.MaxUint64},
}

// initialize tracer

for _, option := range options {
option(app)
}
Expand Down Expand Up @@ -656,6 +682,8 @@
if app.abciHandlers.PreBlocker != nil {
finalizeState := app.stateManager.GetState(execModeFinalize)
ctx := finalizeState.Context().WithEventManager(sdk.NewEventManager())
ctx, span := ctx.StartSpan(tracer, "preBlock")
defer span.End()
rsp, err := app.abciHandlers.PreBlocker(ctx, req)
if err != nil {
return nil, err
Expand All @@ -681,7 +709,10 @@
)

if app.abciHandlers.BeginBlocker != nil {
resp, err = app.abciHandlers.BeginBlocker(app.stateManager.GetState(execModeFinalize).Context())
ctx := app.stateManager.GetState(execModeFinalize).Context()
ctx, span := ctx.StartSpan(tracer, "beginBlock")
defer span.End()
resp, err = app.abciHandlers.BeginBlocker(ctx)
if err != nil {
return resp, err
}
Expand All @@ -707,10 +738,10 @@
var resp *abci.ExecTxResult

defer func() {
telemetry.IncrCounter(1, "tx", "count")

Check failure on line 741 in baseapp/baseapp.go

View workflow job for this annotation

GitHub Actions / golangci-lint

SA1019: telemetry.IncrCounter is deprecated: IncrCounter provides a wrapper functionality for emitting a counter metric with global labels (if any). (staticcheck)
telemetry.IncrCounter(1, "tx", resultStr)

Check failure on line 742 in baseapp/baseapp.go

View workflow job for this annotation

GitHub Actions / golangci-lint

SA1019: telemetry.IncrCounter is deprecated: IncrCounter provides a wrapper functionality for emitting a counter metric with global labels (if any). (staticcheck)
telemetry.SetGauge(float32(gInfo.GasUsed), "tx", "gas", "used")

Check failure on line 743 in baseapp/baseapp.go

View workflow job for this annotation

GitHub Actions / golangci-lint

SA1019: telemetry.SetGauge is deprecated: SetGauge provides a wrapper functionality for emitting a gauge metric with global labels (if any). (staticcheck)
telemetry.SetGauge(float32(gInfo.GasWanted), "tx", "gas", "wanted")

Check failure on line 744 in baseapp/baseapp.go

View workflow job for this annotation

GitHub Actions / golangci-lint

SA1019: telemetry.SetGauge is deprecated: SetGauge provides a wrapper functionality for emitting a gauge metric with global labels (if any). (staticcheck)
}()

gInfo, result, anteEvents, err := app.RunTx(execModeFinalize, tx, nil, txIndex, txMultiStore, incarnationCache)
Expand Down Expand Up @@ -739,11 +770,14 @@

// endBlock is an application-defined function that is called after transactions
// have been processed in FinalizeBlock.
func (app *BaseApp) endBlock(_ context.Context) (sdk.EndBlock, error) {
func (app *BaseApp) endBlock() (sdk.EndBlock, error) {
var endblock sdk.EndBlock

if app.abciHandlers.EndBlocker != nil {
eb, err := app.abciHandlers.EndBlocker(app.stateManager.GetState(execModeFinalize).Context())
ctx := app.stateManager.GetState(execModeFinalize).Context()
ctx, span := ctx.StartSpan(tracer, "endBlock")
defer span.End()
eb, err := app.abciHandlers.EndBlocker(ctx)
if err != nil {
return endblock, err
}
Expand Down Expand Up @@ -773,12 +807,15 @@
// both txbytes and the decoded tx are passed to runTx to avoid the state machine encoding the tx and decoding the transaction twice
// passing the decoded tx to runTX is optional, it will be decoded if the tx is nil
func (app *BaseApp) RunTx(mode sdk.ExecMode, txBytes []byte, tx sdk.Tx, txIndex int, txMultiStore storetypes.MultiStore, incarnationCache map[string]any) (gInfo sdk.GasInfo, result *sdk.Result, anteEvents []abci.Event, err error) {
ctx := app.getContextForTx(mode, txBytes, txIndex)
ctx, span := ctx.StartSpan(tracer, "runTx")
defer span.End()

// NOTE: GasWanted should be returned by the AnteHandler. GasUsed is
// determined by the GasMeter. We need access to the context to get the gas
// meter, so we initialize upfront.
var gasWanted uint64

ctx := app.getContextForTx(mode, txBytes, txIndex)
if incarnationCache != nil {
ctx = ctx.WithIncarnationCache(incarnationCache)
}
Expand Down Expand Up @@ -861,7 +898,9 @@
// performance benefits, but it'll be more difficult to get right.
anteCtx, msCache = app.cacheTxContext(ctx, txBytes)
anteCtx = anteCtx.WithEventManager(sdk.NewEventManager())
anteCtx, anteSpan := anteCtx.StartSpan(tracer, "anteHandler")
newCtx, err := app.anteHandler(anteCtx, tx, mode == execModeSimulate)
anteSpan.End()

if !newCtx.IsZero() {
// At this point, newCtx.MultiStore() is a store branch, or something else
Expand Down Expand Up @@ -951,6 +990,8 @@
consumeBlockGas()

msCache.Write()

txCnt.Add(ctx, 1)
}

if len(anteEvents) > 0 && (mode == execModeFinalize || mode == execModeSimulate) {
Expand All @@ -968,6 +1009,9 @@
// Handler does not exist for a given message route. Otherwise, a reference to a
// Result is returned. The caller must not commit state if an error is returned.
func (app *BaseApp) runMsgs(ctx sdk.Context, msgs []sdk.Msg, msgsV2 []protov2.Message, mode sdk.ExecMode) (*sdk.Result, error) {
ctx, span := ctx.StartSpan(tracer, "runMsgs")
defer span.End()

events := sdk.EmptyEvents()
var msgResponses []*codectypes.Any

Expand All @@ -984,11 +1028,18 @@
return nil, errorsmod.Wrapf(sdkerrors.ErrUnknownRequest, "no message handler found for %T", msg)
}

ctx, msgSpan := ctx.StartSpan(tracer, "msgHandler",
trace.WithAttributes(
attribute.String("msg_type", sdk.MsgTypeURL(msg)),
attribute.Int("msg_index", i),
),
)
// ADR 031 request type routing
msgResult, err := handler(ctx, msg)
if err != nil {
return nil, errorsmod.Wrapf(err, "failed to execute message; message index: %d", i)
}
msgSpan.End()

// create message events
msgEvents, err := createEvents(app.cdc, msgResult.GetEvents(), msg, msgsV2[i])
Expand Down
Loading
Loading