Skip to content

Commit 82fbd65

Browse files
collectors: collect payment and attempt counts
collect raw payment and attempt count information from lnd using the TrackPayments RPC.
1 parent 98a89e9 commit 82fbd65

File tree

5 files changed

+224
-9
lines changed

5 files changed

+224
-9
lines changed

collectors/log.go

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ var (
2020

2121
// htlcLogger is a logger for lndmon's htlc collector.
2222
htlcLogger = build.NewSubLogger("HTLC", backendLog.Logger)
23+
24+
// paymentLogger is a logger for lndmon's payments monitor.
25+
paymentLogger = build.NewSubLogger("PMNT", backendLog.Logger)
2326
)
2427

2528
// initLogRotator initializes the logging rotator to write logs to logFile and

collectors/payments_collector.go

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
package collectors
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"sync"
7+
8+
"github.com/lightninglabs/lndclient"
9+
"github.com/lightningnetwork/lnd/lnrpc"
10+
"github.com/lightningnetwork/lnd/lnrpc/routerrpc"
11+
"github.com/prometheus/client_golang/prometheus"
12+
)
13+
14+
var (
15+
// totalPayments tracks the total number of payments initiated, labeled
16+
// by final payment status. This permits computation of both throughput
17+
// and success/failure rates.
18+
totalPayments = prometheus.NewCounterVec(
19+
prometheus.CounterOpts{
20+
Name: "lnd_total_payments",
21+
Help: "Total number of payments initiated, labeled by final status",
22+
},
23+
[]string{"status"},
24+
)
25+
26+
// totalHTLCAttempts is a simple counter which, in combination with the
27+
// payment counter, permits tracking the number of attempts per payment.
28+
totalHTLCAttempts = prometheus.NewCounter(
29+
prometheus.CounterOpts{
30+
Name: "lnd_total_htlc_attempts",
31+
Help: "Total number of HTLC attempts across all payments",
32+
},
33+
)
34+
35+
// paymentAttempts is a histogram for visualizing what portion of
36+
// payments complete within a given number of attempts.
37+
paymentAttempts = prometheus.NewHistogram(
38+
prometheus.HistogramOpts{
39+
Name: "lnd_payment_attempts_per_payment",
40+
Help: "Histogram tracking the number of attempts per payment",
41+
Buckets: prometheus.ExponentialBucketsRange(1, 2, 10),
42+
},
43+
)
44+
)
45+
46+
// paymentsMonitor listens for payments and updates Prometheus metrics.
47+
type paymentsMonitor struct {
48+
client routerrpc.RouterClient
49+
50+
lnd *lndclient.LndServices
51+
52+
errChan chan error
53+
54+
// quit is closed to signal that we need to shutdown.
55+
quit chan struct{}
56+
57+
wg sync.WaitGroup
58+
}
59+
60+
// newPaymentsMonitor creates a new payments monitor and ensures the context
61+
// includes macaroon authentication.
62+
func newPaymentsMonitor(lnd *lndclient.LndServices,
63+
errChan chan error) (*paymentsMonitor, error) {
64+
65+
return &paymentsMonitor{
66+
client: routerrpc.NewRouterClient(lnd.ClientConn),
67+
lnd: lnd,
68+
errChan: errChan,
69+
quit: make(chan struct{}),
70+
}, nil
71+
}
72+
73+
// start subscribes to `TrackPayments` and updates Prometheus metrics.
74+
func (p *paymentsMonitor) start() error {
75+
paymentLogger.Info("Starting payments monitor...")
76+
77+
// Attach macaroon authentication for the router service.
78+
ctx, cancel := context.WithCancel(context.Background())
79+
ctx, err := p.lnd.WithMacaroonAuthForService(
80+
ctx, lndclient.RouterServiceMac,
81+
)
82+
if err != nil {
83+
cancel()
84+
85+
return fmt.Errorf("failed to get macaroon-authenticated "+
86+
"context: %w", err)
87+
}
88+
89+
stream, err := p.client.TrackPayments(
90+
ctx, &routerrpc.TrackPaymentsRequest{
91+
// NOTE: We only need to know the final result of the
92+
// payment and all attempts.
93+
NoInflightUpdates: true,
94+
},
95+
)
96+
if err != nil {
97+
paymentLogger.Errorf("Failed to subscribe to TrackPayments: %v",
98+
err)
99+
100+
cancel()
101+
102+
return err
103+
}
104+
105+
p.wg.Add(1)
106+
go func() {
107+
defer func() {
108+
cancel()
109+
p.wg.Done()
110+
}()
111+
112+
for {
113+
select {
114+
case <-p.quit:
115+
return
116+
117+
default:
118+
payment, err := stream.Recv()
119+
if err != nil {
120+
paymentLogger.Errorf("Error receiving "+
121+
"payment update: %v", err)
122+
123+
p.errChan <- err
124+
return
125+
}
126+
processPaymentUpdate(payment)
127+
}
128+
}
129+
}()
130+
131+
return nil
132+
}
133+
134+
// stop cancels the payments monitor subscription.
135+
func (p *paymentsMonitor) stop() {
136+
paymentLogger.Info("Stopping payments monitor...")
137+
138+
close(p.quit)
139+
p.wg.Wait()
140+
}
141+
142+
// collectors returns all of the collectors that the htlc monitor uses.
143+
func (p *paymentsMonitor) collectors() []prometheus.Collector {
144+
return []prometheus.Collector{
145+
totalPayments, totalHTLCAttempts, paymentAttempts,
146+
}
147+
}
148+
149+
// processPaymentUpdate updates Prometheus metrics based on received payments.
150+
//
151+
// NOTE: It is expected that this receive the *final* payment update with the
152+
// complete list of all htlc attempts made for this payment.
153+
func processPaymentUpdate(payment *lnrpc.Payment) {
154+
var status string
155+
156+
switch payment.Status {
157+
case lnrpc.Payment_SUCCEEDED:
158+
status = "succeeded"
159+
case lnrpc.Payment_FAILED:
160+
status = "failed"
161+
default:
162+
// We don't expect this given that this should be a terminal
163+
// payment update.
164+
status = "unknown"
165+
}
166+
167+
totalPayments.WithLabelValues(status).Inc()
168+
attemptCount := len(payment.Htlcs)
169+
170+
totalHTLCAttempts.Add(float64(attemptCount))
171+
paymentAttempts.Observe(float64(attemptCount))
172+
173+
paymentLogger.Debugf("Payment %s updated: status=%s, %d attempts",
174+
payment.PaymentHash, status, attemptCount)
175+
}

collectors/prometheus.go

+40-7
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ type PrometheusExporter struct {
3232

3333
monitoringCfg *MonitoringConfig
3434

35-
htlcMonitor *htlcMonitor
35+
htlcMonitor *htlcMonitor
36+
paymentsMonitor *paymentsMonitor
3637

3738
// collectors is the exporter's active set of collectors.
3839
collectors []prometheus.Collector
@@ -72,6 +73,9 @@ type MonitoringConfig struct {
7273
// DisableHtlc disables collection of HTLCs metrics
7374
DisableHtlc bool
7475

76+
// DisablePayments disables collection of payment metrics
77+
DisablePayments bool
78+
7579
// ProgramStartTime stores a best-effort estimate of when lnd/lndmon was
7680
// started.
7781
ProgramStartTime time.Time
@@ -100,6 +104,15 @@ func NewPrometheusExporter(cfg *PrometheusConfig, lnd *lndclient.LndServices,
100104

101105
htlcMonitor := newHtlcMonitor(lnd.Router, errChan)
102106

107+
// Create payments monitor.
108+
paymentsMonitor, err := newPaymentsMonitor(lnd, errChan)
109+
if err != nil {
110+
paymentLogger.Errorf("Failed to initialize payment monitor: %v",
111+
err)
112+
113+
return nil
114+
}
115+
103116
chanCollector := NewChannelsCollector(
104117
lnd.Client, errChan, quitChan, monitoringCfg,
105118
)
@@ -117,19 +130,26 @@ func NewPrometheusExporter(cfg *PrometheusConfig, lnd *lndclient.LndServices,
117130
collectors = append(collectors, htlcMonitor.collectors()...)
118131
}
119132

133+
if !monitoringCfg.DisablePayments {
134+
collectors = append(
135+
collectors, paymentsMonitor.collectors()...,
136+
)
137+
}
138+
120139
if !monitoringCfg.DisableGraph {
121140
collectors = append(
122141
collectors, NewGraphCollector(lnd.Client, errChan),
123142
)
124143
}
125144

126145
return &PrometheusExporter{
127-
cfg: cfg,
128-
lnd: lnd,
129-
monitoringCfg: monitoringCfg,
130-
collectors: collectors,
131-
htlcMonitor: htlcMonitor,
132-
errChan: errChan,
146+
cfg: cfg,
147+
lnd: lnd,
148+
monitoringCfg: monitoringCfg,
149+
collectors: collectors,
150+
htlcMonitor: htlcMonitor,
151+
paymentsMonitor: paymentsMonitor,
152+
errChan: errChan,
133153
}
134154
}
135155

@@ -165,6 +185,15 @@ func (p *PrometheusExporter) Start() error {
165185
}
166186
}
167187

188+
// Start the payment monitor goroutine. This will subscribe to receive
189+
// update for all payments made by lnd and update our payments related
190+
// metrics.
191+
if !p.monitoringCfg.DisablePayments {
192+
if err := p.paymentsMonitor.start(); err != nil {
193+
return err
194+
}
195+
}
196+
168197
// Finally, we'll launch the HTTP server that Prometheus will use to
169198
// scape our metrics.
170199
go func() {
@@ -199,6 +228,10 @@ func (p *PrometheusExporter) Stop() {
199228
if !p.monitoringCfg.DisableHtlc {
200229
p.htlcMonitor.stop()
201230
}
231+
232+
if !p.monitoringCfg.DisablePayments {
233+
p.paymentsMonitor.stop()
234+
}
202235
}
203236

204237
// Errors returns an error channel that any failures experienced by its

config.go

+3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ type config struct {
5454

5555
// DisableHtlc disables the collection of HTLCs metrics.
5656
DisableHtlc bool `long:"disablehtlc" description:"Do not collect HTLCs metrics"`
57+
58+
// DisablePayments disables the collection of payments metrics.
59+
DisablePayments bool `long:"disablepayments" description:"Do not collect payments metrics"`
5760
}
5861

5962
var defaultConfig = config{

lndmon.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ func start() error {
6363
defer lnd.Close()
6464

6565
monitoringCfg := collectors.MonitoringConfig{
66-
DisableGraph: cfg.DisableGraph,
67-
DisableHtlc: cfg.DisableHtlc,
66+
DisableGraph: cfg.DisableGraph,
67+
DisableHtlc: cfg.DisableHtlc,
68+
DisablePayments: cfg.DisablePayments,
6869
}
6970
if cfg.PrimaryNode != "" {
7071
primaryNode, err := route.NewVertexFromStr(cfg.PrimaryNode)

0 commit comments

Comments
 (0)