Skip to content

Commit 8c6ce3a

Browse files
authored
funder: prometheus metrics (#869)
## Summary of Changes - Expose prometheus `/metrics` endpoint on funder - Emit the following metrics: - `doublezero_funder_build_info` - Build information of the agent (version, commit, date) - `doublezero_funder_errors_total` - Number of errors (counter) with labels of error_type for distinguishing - `doublezero_funder_account_balance_sol` - Funder account balance in SOL (gauge) for alerting when funds are low - Resolves #856 - Related to #742 ## Testing Verification - Update funder e2e test to check errors and balance metrics
1 parent 7284867 commit 8c6ce3a

File tree

10 files changed

+283
-167
lines changed

10 files changed

+283
-167
lines changed

controlplane/funder/cmd/main.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"flag"
66
"fmt"
77
"log/slog"
8+
"net"
9+
"net/http"
810
"os"
911
"os/signal"
1012
"syscall"
@@ -13,7 +15,9 @@ import (
1315
"github.com/gagliardetto/solana-go"
1416
solanarpc "github.com/gagliardetto/solana-go/rpc"
1517
"github.com/malbeclabs/doublezero/controlplane/funder/internal/funder"
18+
"github.com/malbeclabs/doublezero/controlplane/funder/internal/metrics"
1619
"github.com/malbeclabs/doublezero/smartcontract/sdk/go/serviceability"
20+
"github.com/prometheus/client_golang/prometheus/promhttp"
1721
)
1822

1923
const (
@@ -31,6 +35,8 @@ var (
3135
topUpSOL = flag.Float64("top-up-sol", defaultTopUpSOL, "the amount of SOL to top up the funder with")
3236
verbose = flag.Bool("verbose", false, "enable verbose logging")
3337
showVersion = flag.Bool("version", false, "Print the version of the doublezero-agent and exit")
38+
metricsEnable = flag.Bool("metrics-enable", false, "Enable prometheus metrics")
39+
metricsAddr = flag.String("metrics-addr", ":8080", "Address to listen on for prometheus metrics")
3440

3541
// Set by LDFLAGS
3642
version = "dev"
@@ -72,6 +78,23 @@ func main() {
7278
os.Exit(1)
7379
}
7480

81+
// Set up prometheus metrics server if enabled.
82+
if *metricsEnable {
83+
metrics.BuildInfo.WithLabelValues(version, commit, date).Set(1)
84+
go func() {
85+
listener, err := net.Listen("tcp", *metricsAddr)
86+
if err != nil {
87+
log.Error("Failed to start prometheus metrics server listener", "error", err)
88+
return
89+
}
90+
log.Info("Prometheus metrics server listening", "address", listener.Addr())
91+
http.Handle("/metrics", promhttp.Handler())
92+
if err := http.Serve(listener, nil); err != nil {
93+
log.Error("Failed to start prometheus metrics server", "error", err)
94+
}
95+
}()
96+
}
97+
7598
// Check that keypair path exists.
7699
if _, err := os.Stat(*keypairPath); os.IsNotExist(err) {
77100
log.Error("Funder keypair does not exist", "path", *keypairPath)

controlplane/funder/internal/funder/config.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ var (
2323
)
2424

2525
const (
26-
defaultWaitForBalanceTimeout = 20 * time.Second
26+
defaultWaitForBalanceTimeout = 60 * time.Second
2727
defaultWaitForBalancePollInterval = 1 * time.Second
2828
)
2929

@@ -40,6 +40,14 @@ type Config struct {
4040
WaitForBalancePollInterval time.Duration
4141
}
4242

43+
func (c *Config) MinBalanceLamports() uint64 {
44+
return uint64(c.MinBalanceSOL * float64(solana.LAMPORTS_PER_SOL))
45+
}
46+
47+
func (c *Config) TopUpLamports() uint64 {
48+
return uint64(c.TopUpSOL * float64(solana.LAMPORTS_PER_SOL))
49+
}
50+
4351
func (c *Config) Validate() error {
4452
if c.Logger == nil {
4553
return ErrLoggerRequired

controlplane/funder/internal/funder/funder.go

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"github.com/gagliardetto/solana-go"
1010
"github.com/gagliardetto/solana-go/programs/system"
1111
solanarpc "github.com/gagliardetto/solana-go/rpc"
12+
"github.com/malbeclabs/doublezero/controlplane/funder/internal/metrics"
1213
)
1314

1415
type Funder struct {
@@ -27,11 +28,14 @@ func New(cfg Config) (*Funder, error) {
2728
}
2829

2930
func (f *Funder) Run(ctx context.Context) error {
31+
minBalanceLamports := f.cfg.MinBalanceLamports()
32+
topUpLamports := f.cfg.TopUpLamports()
33+
3034
f.log.Info("Starting funder",
3135
"interval", f.cfg.Interval,
3236
"signer", f.cfg.Signer.PublicKey(),
33-
"minBalanceSOL", f.cfg.MinBalanceSOL,
34-
"topUpSOL", f.cfg.TopUpSOL,
37+
"minBalanceLamports", minBalanceLamports,
38+
"topUpLamports", topUpLamports,
3539
"serviceabilityProgramID", f.cfg.Serviceability.ProgramID(),
3640
)
3741

@@ -47,21 +51,26 @@ func (f *Funder) Run(ctx context.Context) error {
4751
err := f.cfg.Serviceability.Load(ctx)
4852
if err != nil {
4953
f.log.Error("Failed to load serviceability state", "error", err)
54+
metrics.Errors.WithLabelValues(metrics.ErrorTypeLoadServiceabilityState).Inc()
5055
continue
5156
}
5257

5358
// Check balance of funder.
5459
balance, err := f.cfg.Solana.GetBalance(ctx, f.cfg.Signer.PublicKey(), solanarpc.CommitmentFinalized)
5560
if err != nil {
5661
f.log.Error("Failed to get balance", "error", err)
62+
metrics.Errors.WithLabelValues(metrics.ErrorTypeGetFunderAccountBalance).Inc()
5763
continue
5864
}
5965
balanceLamports := balance.Value
60-
f.log.Debug("Funder balance", "balance", balanceLamports)
66+
f.log.Debug("Funder balance", "account", f.cfg.Signer.PublicKey(), "balanceLamports", balanceLamports)
67+
balanceSOL := float64(balanceLamports) / float64(solana.LAMPORTS_PER_SOL)
68+
metrics.FunderAccountBalanceSOL.WithLabelValues(f.cfg.Signer.PublicKey().String()).Set(balanceSOL)
6169

6270
// Check that we have enough SOL to top up metrics publishers.
63-
if balanceLamports < uint64(f.cfg.TopUpSOL*float64(solana.LAMPORTS_PER_SOL)) {
64-
f.log.Error("Funder balance is below minimum", "balance", balanceLamports, "minBalance", f.cfg.TopUpSOL)
71+
if balanceLamports < topUpLamports {
72+
f.log.Error("Funder balance is below minimum", "balanceLamports", balanceLamports, "minBalanceLamports", minBalanceLamports)
73+
metrics.Errors.WithLabelValues(metrics.ErrorTypeFunderAccountBalanceBelowMinimum).Inc()
6574
continue
6675
}
6776

@@ -80,36 +89,40 @@ func (f *Funder) Run(ctx context.Context) error {
8089
balance, err := f.cfg.Solana.GetBalance(ctx, metricsPublisherPK, solanarpc.CommitmentFinalized)
8190
if err != nil {
8291
f.log.Error("Failed to get balance", "error", err)
92+
metrics.Errors.WithLabelValues(metrics.ErrorTypeGetMetricsPublisherAccountBalance).Inc()
8393
continue
8494
}
8595
balanceLamports := balance.Value
86-
f.log.Debug("Metrics publisher balance", "device", devicePK, "metricsPublisher", metricsPublisherPK, "balance", balanceLamports, "minBalance", f.cfg.MinBalanceSOL)
96+
f.log.Debug("Metrics publisher balance", "device", devicePK, "metricsPublisher", metricsPublisherPK, "balanceLamports", balanceLamports, "minBalanceLamports", minBalanceLamports)
8797

8898
// If balance is below minimum, top it up.
89-
if balanceLamports < uint64(f.cfg.MinBalanceSOL*float64(solana.LAMPORTS_PER_SOL)) {
90-
f.log.Info("Topping up metrics publisher", "device", devicePK, "metricsPublisher", metricsPublisherPK, "balance", balanceLamports, "topUp", f.cfg.TopUpSOL)
99+
if balanceLamports < minBalanceLamports {
100+
f.log.Info("Topping up metrics publisher", "device", devicePK, "metricsPublisher", metricsPublisherPK, "balanceLamports", balanceLamports, "topUpLamports", topUpLamports)
91101

92-
_, err := transferFunds(ctx, f.cfg.Solana, f.cfg.Signer, metricsPublisherPK, uint64(f.cfg.TopUpSOL*float64(solana.LAMPORTS_PER_SOL)), nil)
102+
_, err := transferFunds(ctx, f.cfg.Solana, f.cfg.Signer, metricsPublisherPK, topUpLamports, nil)
93103
if err != nil {
94104
f.log.Error("Failed to transfer SOL", "error", err)
105+
metrics.Errors.WithLabelValues(metrics.ErrorTypeTransferFundsToMetricsPublisher).Inc()
95106
continue
96107
}
97108

98109
// Wait for the transfer to complete.
99-
err = waitForBalance(ctx, f.cfg.Solana, metricsPublisherPK, f.cfg.MinBalanceSOL, f.cfg.WaitForBalanceTimeout, f.cfg.WaitForBalancePollInterval)
110+
f.log.Debug("Waiting for balance", "account", metricsPublisherPK, "expected", minBalanceLamports, "current", balanceLamports)
111+
err = waitForBalance(ctx, f.cfg.Solana, metricsPublisherPK, minBalanceLamports, f.cfg.WaitForBalanceTimeout, f.cfg.WaitForBalancePollInterval)
100112
if err != nil {
101113
f.log.Error("Failed to wait for balance", "error", err)
114+
metrics.Errors.WithLabelValues(metrics.ErrorTypeWaitForMetricsPublisherBalance).Inc()
102115
continue
103116
}
104117

105-
f.log.Info("Transferred SOL to metrics publisher", "device", devicePK, "metricsPublisher", metricsPublisherPK, "amount", f.cfg.TopUpSOL)
118+
f.log.Info("Transferred SOL to metrics publisher", "device", devicePK, "metricsPublisher", metricsPublisherPK, "topUpLamports", topUpLamports)
106119
}
107120
}
108121
}
109122
}
110123
}
111124

112-
func waitForBalance(ctx context.Context, client SolanaClient, account solana.PublicKey, minBalanceSOL float64, timeout time.Duration, pollInterval time.Duration) error {
125+
func waitForBalance(ctx context.Context, client SolanaClient, account solana.PublicKey, minBalanceLamports uint64, timeout time.Duration, pollInterval time.Duration) error {
113126
timer := time.NewTimer(timeout)
114127
defer timer.Stop()
115128

@@ -119,15 +132,15 @@ func waitForBalance(ctx context.Context, client SolanaClient, account solana.Pub
119132
return fmt.Errorf("failed to get balance: %w", err)
120133
}
121134
balanceLamports := balance.Value
122-
if balanceLamports >= uint64(minBalanceSOL*float64(solana.LAMPORTS_PER_SOL)) {
135+
if balanceLamports >= minBalanceLamports {
123136
return nil
124137
}
125138

126139
select {
127140
case <-ctx.Done():
128141
return ctx.Err()
129142
case <-timer.C:
130-
return fmt.Errorf("timeout waiting for balance: account=%s, expected balance=%.2f SOL", account, minBalanceSOL)
143+
return fmt.Errorf("timeout waiting for balance: account=%s, expected balance=%d, current balance=%d", account, minBalanceLamports, balanceLamports)
131144
case <-time.After(pollInterval):
132145
// continue polling
133146
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package metrics
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
"github.com/prometheus/client_golang/prometheus/promauto"
6+
)
7+
8+
const (
9+
// Metrics names.
10+
MetricNameBuildInfo = "doublezero_funder_build_info"
11+
MetricNameErrors = "doublezero_funder_errors_total"
12+
MetricNameFunderAccountBalanceSOL = "doublezero_funder_account_balance_sol"
13+
14+
// Labels.
15+
LabelVersion = "version"
16+
LabelCommit = "commit"
17+
LabelDate = "date"
18+
LabelErrorType = "error_type"
19+
LabelFunderAccount = "funder_account"
20+
21+
// Error types.
22+
ErrorTypeLoadServiceabilityState = "load_serviceability_state"
23+
ErrorTypeGetFunderAccountBalance = "get_funder_account_balance"
24+
ErrorTypeFunderAccountBalanceBelowMinimum = "funder_account_balance_below_minimum"
25+
ErrorTypeGetMetricsPublisherAccountBalance = "get_metrics_publisher_account_balance"
26+
ErrorTypeTransferFundsToMetricsPublisher = "transfer_funds_to_metrics_publisher"
27+
ErrorTypeWaitForMetricsPublisherBalance = "wait_for_metrics_publisher_balance"
28+
)
29+
30+
var (
31+
BuildInfo = promauto.NewGaugeVec(
32+
prometheus.GaugeOpts{
33+
Name: MetricNameBuildInfo,
34+
Help: "Build information of the funder agent",
35+
},
36+
[]string{LabelVersion, LabelCommit, LabelDate},
37+
)
38+
39+
Errors = promauto.NewCounterVec(
40+
prometheus.CounterOpts{
41+
Name: MetricNameErrors,
42+
Help: "Number of errors encountered",
43+
},
44+
[]string{LabelErrorType},
45+
)
46+
47+
FunderAccountBalanceSOL = promauto.NewGaugeVec(
48+
prometheus.GaugeOpts{
49+
Name: MetricNameFunderAccountBalanceSOL,
50+
Help: "The balance of the funder account in SOL",
51+
},
52+
[]string{LabelFunderAccount},
53+
)
54+
)

e2e/funder_test.go

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
package e2e_test
44

55
import (
6+
"fmt"
67
"log/slog"
78
"os"
89
"path/filepath"
@@ -59,6 +60,24 @@ func TestE2E_Funder(t *testing.T) {
5960
require.NoError(t, err)
6061
funderPK := funderPrivateKey.PublicKey()
6162

63+
// Check that the errors metric only contains "funder_account_balance_below_minimum" errors,
64+
// which occur on startup while waiting for the manager/funder account to be funded.
65+
metricsClient := dn.Funder.GetMetricsClient()
66+
require.NoError(t, metricsClient.Fetch(ctx))
67+
errors := metricsClient.GetCounterValues("doublezero_funder_errors_total")
68+
require.NotNil(t, errors)
69+
require.Len(t, errors, 1)
70+
require.Equal(t, "funder_account_balance_below_minimum", errors[0].Labels["error_type"])
71+
prevFunderAccountBalanceBelowMinimumCount := int(errors[0].Value)
72+
73+
// Check the funder account balance metric.
74+
require.NoError(t, metricsClient.Fetch(ctx))
75+
funderBalance := metricsClient.GetGaugeValues("doublezero_funder_account_balance_sol")
76+
require.NotNil(t, funderBalance)
77+
// The funder account is the manager account, which we fund with 100 SOL during devnet setup.
78+
require.Greater(t, funderBalance[0].Value, 50.0)
79+
prevFunderBalance := funderBalance[0].Value
80+
6281
// Add a device onchain with metrics publisher pubkey.
6382
log.Debug("==> Creating LA device onchain")
6483
laDeviceMetricsPublisherWallet := solana.NewWallet()
@@ -84,20 +103,78 @@ func TestE2E_Funder(t *testing.T) {
84103
// Check that the metrics publisher pubkey is eventually funded.
85104
requireEventuallyFunded(t, log, rpcClient, nyDeviceMetricsPublisherWallet.PublicKey(), minBalanceSOL, "NY device metrics publisher")
86105

106+
// Check that the funder account balance is now lower.
107+
require.NoError(t, metricsClient.Fetch(ctx))
108+
funderBalance = metricsClient.GetGaugeValues("doublezero_funder_account_balance_sol")
109+
require.NotNil(t, funderBalance)
110+
require.Less(t, funderBalance[0].Value, prevFunderBalance)
111+
87112
// Drain current balance from the devices onchain.
88-
drainFunds(t, rpcClient, laDeviceMetricsPublisherWallet.PrivateKey, funderPK, 0.01)
89-
drainFunds(t, rpcClient, nyDeviceMetricsPublisherWallet.PrivateKey, funderPK, 0.01)
113+
drainWallet := solana.NewWallet()
114+
log.Debug("--> Draining LA device balance", "account", laDeviceMetricsPublisherWallet.PublicKey())
115+
drainFunds(t, rpcClient, laDeviceMetricsPublisherWallet.PrivateKey, drainWallet.PublicKey(), 0.01)
116+
log.Debug("--> Draining NY device balance", "account", nyDeviceMetricsPublisherWallet.PublicKey())
117+
drainFunds(t, rpcClient, nyDeviceMetricsPublisherWallet.PrivateKey, drainWallet.PublicKey(), 0.01)
90118

91119
// Check that the devices are eventually funded again.
120+
beforeFunderBalance := getBalance(t, rpcClient, funderPK)
92121
requireEventuallyFunded(t, log, rpcClient, laDeviceMetricsPublisherWallet.PublicKey(), minBalanceSOL, "LA device metrics publisher")
93122
requireEventuallyFunded(t, log, rpcClient, nyDeviceMetricsPublisherWallet.PublicKey(), minBalanceSOL, "NY device metrics publisher")
123+
124+
// Wait for the funder account balance to show the top up.
125+
require.Eventually(t, func() bool {
126+
funderBalance := getBalance(t, rpcClient, funderPK)
127+
return funderBalance <= beforeFunderBalance-2*topUpSOL
128+
}, 60*time.Second, 5*time.Second)
129+
130+
// Drain the funder account balance to near 0.
131+
log.Debug("--> Draining funder account balance", "account", funderPK)
132+
drainFunds(t, rpcClient, funderPrivateKey, drainWallet.PublicKey(), 0.01)
133+
134+
// Check that the errors metric for "funder_account_balance_below_minimum" eventually increases,
135+
// which occurs when the funder account balance is drained to below the minimum.
136+
require.Eventually(t, func() bool {
137+
require.NoError(t, metricsClient.Fetch(ctx))
138+
errors = metricsClient.GetCounterValues("doublezero_funder_errors_total")
139+
require.NotNil(t, errors)
140+
require.Len(t, errors, 1)
141+
require.Equal(t, "funder_account_balance_below_minimum", errors[0].Labels["error_type"])
142+
if int(errors[0].Value) > prevFunderAccountBalanceBelowMinimumCount {
143+
return true
144+
}
145+
log.Debug("--> Waiting for funder account balance below minimum error to increase", "account", funderPK, "prevCount", prevFunderAccountBalanceBelowMinimumCount, "currentCount", int(errors[0].Value))
146+
return false
147+
}, 60*time.Second, 5*time.Second)
148+
149+
// Check that the funder account balance gauge metric is now near 0.
150+
require.NoError(t, metricsClient.Fetch(ctx))
151+
funderBalance = metricsClient.GetGaugeValues("doublezero_funder_account_balance_sol")
152+
require.NotNil(t, funderBalance)
153+
require.LessOrEqual(t, funderBalance[0].Value, 0.01)
154+
155+
// Transfer the drained funds back to the funder account.
156+
expectedFunderBalance := drainFunds(t, rpcClient, drainWallet.PrivateKey, funderPrivateKey.PublicKey(), 0.01)
157+
158+
// Check that the funder account balance is eventually back near the previous value.
159+
require.Eventually(t, func() bool {
160+
require.NoError(t, metricsClient.Fetch(ctx))
161+
funderBalance = metricsClient.GetGaugeValues("doublezero_funder_account_balance_sol")
162+
require.NotNil(t, funderBalance)
163+
if funderBalance[0].Value > expectedFunderBalance-0.01 && funderBalance[0].Value < expectedFunderBalance+0.01 {
164+
return true
165+
}
166+
log.Debug("--> Waiting for funder account balance to be back near previous value", "account", funderPK, "expectedBalance", expectedFunderBalance, "currentBalance", funderBalance[0].Value)
167+
return false
168+
}, 60*time.Second, 5*time.Second)
94169
}
95170

96-
func drainFunds(t *testing.T, client *solanarpc.Client, from solana.PrivateKey, to solana.PublicKey, amount float64) {
171+
func drainFunds(t *testing.T, client *solanarpc.Client, from solana.PrivateKey, to solana.PublicKey, remainingBalanceSOL float64) float64 {
97172
t.Helper()
98173

99174
balanceSOL := getBalance(t, client, from.PublicKey())
100-
transferFunds(t, client, from, to, balanceSOL-amount, nil)
175+
transferFunds(t, client, from, to, balanceSOL-remainingBalanceSOL, nil)
176+
177+
return balanceSOL - remainingBalanceSOL
101178
}
102179

103180
func requireEventuallyFunded(t *testing.T, log *slog.Logger, client *solanarpc.Client, account solana.PublicKey, minBalanceSOL float64, name string) {
@@ -108,7 +185,7 @@ func requireEventuallyFunded(t *testing.T, log *slog.Logger, client *solanarpc.C
108185
require.NoError(t, err)
109186
balanceSOL := float64(balance.Value) / float64(solana.LAMPORTS_PER_SOL)
110187
if balanceSOL < minBalanceSOL {
111-
log.Debug("--> Waiting for %s to be funded", "name", name)
188+
log.Debug(fmt.Sprintf("--> Waiting for %s to be funded", name), "account", account, "minBalance", minBalanceSOL, "balance", balanceSOL)
112189
return false
113190
}
114191
return true

0 commit comments

Comments
 (0)