Skip to content

Commit fd56748

Browse files
authored
Merge pull request #115 from calvinrzachman/payment-collector
collectors: collect payment and attempt counts
2 parents 98a89e9 + 29b674f commit fd56748

File tree

6 files changed

+221
-12
lines changed

6 files changed

+221
-12
lines changed

.github/workflows/main.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ jobs:
2727
runs-on: ubuntu-latest
2828
steps:
2929
- name: git checkout
30-
uses: actions/checkout@v2
30+
uses: actions/checkout@v4
3131

3232
- name: go cache
33-
uses: actions/cache@v1
33+
uses: actions/cache@v4
3434
with:
3535
path: /home/runner/work/go
3636
key: lndmon-${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ github.job }}-${{ hashFiles('**/go.sum') }}
@@ -41,7 +41,7 @@ jobs:
4141
lndmon-${{ runner.os }}-go-
4242
4343
- name: setup go ${{ env.GO_VERSION }}
44-
uses: actions/setup-go@v2
44+
uses: actions/setup-go@v5
4545
with:
4646
go-version: '${{ env.GO_VERSION }}'
4747

collectors/log.go

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ var (
2020

2121
// htlcLogger is a logger for lndmon's htlc collector.
2222
htlcLogger = build.NewSubLogger("HTLC", backendLog.Logger)
23+
24+
// paymentLogger is a logger for lndmon's payments monitor.
25+
paymentLogger = build.NewSubLogger("PMNT", backendLog.Logger)
2326
)
2427

2528
// initLogRotator initializes the logging rotator to write logs to logFile and

collectors/payments_collector.go

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
package collectors
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"sync"
7+
8+
"github.com/lightninglabs/lndclient"
9+
"github.com/lightningnetwork/lnd/lnrpc"
10+
"github.com/lightningnetwork/lnd/lnrpc/routerrpc"
11+
"github.com/prometheus/client_golang/prometheus"
12+
)
13+
14+
var (
15+
// totalPayments tracks the total number of payments initiated, labeled
16+
// by final payment status. This permits computation of both throughput
17+
// and success/failure rates.
18+
totalPayments = prometheus.NewCounterVec(
19+
prometheus.CounterOpts{
20+
Name: "lnd_total_payments",
21+
Help: "Total number of payments initiated, labeled by final status",
22+
},
23+
[]string{"status"},
24+
)
25+
26+
// totalHTLCAttempts is a simple counter which, in combination with the
27+
// payment counter, permits tracking the number of attempts per payment.
28+
totalHTLCAttempts = prometheus.NewCounter(
29+
prometheus.CounterOpts{
30+
Name: "lnd_total_htlc_attempts",
31+
Help: "Total number of HTLC attempts across all payments",
32+
},
33+
)
34+
35+
// paymentAttempts is a histogram for visualizing what portion of
36+
// payments complete within a given number of attempts.
37+
paymentAttempts = prometheus.NewHistogram(
38+
prometheus.HistogramOpts{
39+
Name: "lnd_payment_attempts_per_payment",
40+
Help: "Histogram tracking the number of attempts per payment",
41+
Buckets: prometheus.ExponentialBuckets(1, 2, 10),
42+
},
43+
)
44+
)
45+
46+
// paymentsMonitor listens for payments and updates Prometheus metrics.
47+
type paymentsMonitor struct {
48+
client routerrpc.RouterClient
49+
50+
lnd *lndclient.LndServices
51+
52+
errChan chan error
53+
54+
// quit is closed to signal that we need to shutdown.
55+
quit chan struct{}
56+
57+
wg sync.WaitGroup
58+
}
59+
60+
// newPaymentsMonitor creates a new payments monitor and ensures the context
61+
// includes macaroon authentication.
62+
func newPaymentsMonitor(lnd *lndclient.LndServices,
63+
errChan chan error) *paymentsMonitor {
64+
65+
return &paymentsMonitor{
66+
client: routerrpc.NewRouterClient(lnd.ClientConn),
67+
lnd: lnd,
68+
errChan: errChan,
69+
quit: make(chan struct{}),
70+
}
71+
}
72+
73+
// start subscribes to `TrackPayments` and updates Prometheus metrics.
74+
func (p *paymentsMonitor) start() error {
75+
paymentLogger.Info("Starting payments monitor...")
76+
77+
// Attach macaroon authentication for the router service.
78+
ctx, cancel := context.WithCancel(context.Background())
79+
ctx, err := p.lnd.WithMacaroonAuthForService(
80+
ctx, lndclient.RouterServiceMac,
81+
)
82+
if err != nil {
83+
cancel()
84+
85+
return fmt.Errorf("failed to get macaroon-authenticated "+
86+
"context: %w", err)
87+
}
88+
89+
stream, err := p.client.TrackPayments(
90+
ctx, &routerrpc.TrackPaymentsRequest{
91+
// NOTE: We only need to know the final result of the
92+
// payment and all attempts.
93+
NoInflightUpdates: true,
94+
},
95+
)
96+
if err != nil {
97+
paymentLogger.Errorf("Failed to subscribe to TrackPayments: %v",
98+
err)
99+
100+
cancel()
101+
102+
return err
103+
}
104+
105+
p.wg.Add(1)
106+
go func() {
107+
defer func() {
108+
cancel()
109+
p.wg.Done()
110+
}()
111+
112+
for {
113+
select {
114+
case <-p.quit:
115+
return
116+
117+
default:
118+
payment, err := stream.Recv()
119+
if err != nil {
120+
paymentLogger.Errorf("Error receiving "+
121+
"payment update: %v", err)
122+
123+
p.errChan <- err
124+
return
125+
}
126+
processPaymentUpdate(payment)
127+
}
128+
}
129+
}()
130+
131+
return nil
132+
}
133+
134+
// stop cancels the payments monitor subscription.
135+
func (p *paymentsMonitor) stop() {
136+
paymentLogger.Info("Stopping payments monitor...")
137+
138+
close(p.quit)
139+
p.wg.Wait()
140+
}
141+
142+
// collectors returns all of the collectors that the htlc monitor uses.
143+
func (p *paymentsMonitor) collectors() []prometheus.Collector {
144+
return []prometheus.Collector{
145+
totalPayments, totalHTLCAttempts, paymentAttempts,
146+
}
147+
}
148+
149+
// processPaymentUpdate updates Prometheus metrics based on received payments.
150+
//
151+
// NOTE: It is expected that this receive the *final* payment update with the
152+
// complete list of all htlc attempts made for this payment.
153+
func processPaymentUpdate(payment *lnrpc.Payment) {
154+
var status string
155+
156+
switch payment.Status {
157+
case lnrpc.Payment_SUCCEEDED:
158+
status = "succeeded"
159+
case lnrpc.Payment_FAILED:
160+
status = "failed"
161+
default:
162+
// We don't expect this given that this should be a terminal
163+
// payment update.
164+
status = "unknown"
165+
}
166+
167+
totalPayments.WithLabelValues(status).Inc()
168+
attemptCount := len(payment.Htlcs)
169+
170+
totalHTLCAttempts.Add(float64(attemptCount))
171+
paymentAttempts.Observe(float64(attemptCount))
172+
173+
paymentLogger.Debugf("Payment %s updated: status=%s, %d attempts",
174+
payment.PaymentHash, status, attemptCount)
175+
}

collectors/prometheus.go

+34-7
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ type PrometheusExporter struct {
3232

3333
monitoringCfg *MonitoringConfig
3434

35-
htlcMonitor *htlcMonitor
35+
htlcMonitor *htlcMonitor
36+
paymentsMonitor *paymentsMonitor
3637

3738
// collectors is the exporter's active set of collectors.
3839
collectors []prometheus.Collector
@@ -72,6 +73,9 @@ type MonitoringConfig struct {
7273
// DisableHtlc disables collection of HTLCs metrics
7374
DisableHtlc bool
7475

76+
// DisablePayments disables collection of payment metrics
77+
DisablePayments bool
78+
7579
// ProgramStartTime stores a best-effort estimate of when lnd/lndmon was
7680
// started.
7781
ProgramStartTime time.Time
@@ -100,6 +104,9 @@ func NewPrometheusExporter(cfg *PrometheusConfig, lnd *lndclient.LndServices,
100104

101105
htlcMonitor := newHtlcMonitor(lnd.Router, errChan)
102106

107+
// Create payments monitor.
108+
paymentsMonitor := newPaymentsMonitor(lnd, errChan)
109+
103110
chanCollector := NewChannelsCollector(
104111
lnd.Client, errChan, quitChan, monitoringCfg,
105112
)
@@ -117,19 +124,26 @@ func NewPrometheusExporter(cfg *PrometheusConfig, lnd *lndclient.LndServices,
117124
collectors = append(collectors, htlcMonitor.collectors()...)
118125
}
119126

127+
if !monitoringCfg.DisablePayments {
128+
collectors = append(
129+
collectors, paymentsMonitor.collectors()...,
130+
)
131+
}
132+
120133
if !monitoringCfg.DisableGraph {
121134
collectors = append(
122135
collectors, NewGraphCollector(lnd.Client, errChan),
123136
)
124137
}
125138

126139
return &PrometheusExporter{
127-
cfg: cfg,
128-
lnd: lnd,
129-
monitoringCfg: monitoringCfg,
130-
collectors: collectors,
131-
htlcMonitor: htlcMonitor,
132-
errChan: errChan,
140+
cfg: cfg,
141+
lnd: lnd,
142+
monitoringCfg: monitoringCfg,
143+
collectors: collectors,
144+
htlcMonitor: htlcMonitor,
145+
paymentsMonitor: paymentsMonitor,
146+
errChan: errChan,
133147
}
134148
}
135149

@@ -165,6 +179,15 @@ func (p *PrometheusExporter) Start() error {
165179
}
166180
}
167181

182+
// Start the payment monitor goroutine. This will subscribe to receive
183+
// update for all payments made by lnd and update our payments related
184+
// metrics.
185+
if !p.monitoringCfg.DisablePayments {
186+
if err := p.paymentsMonitor.start(); err != nil {
187+
return err
188+
}
189+
}
190+
168191
// Finally, we'll launch the HTTP server that Prometheus will use to
169192
// scape our metrics.
170193
go func() {
@@ -199,6 +222,10 @@ func (p *PrometheusExporter) Stop() {
199222
if !p.monitoringCfg.DisableHtlc {
200223
p.htlcMonitor.stop()
201224
}
225+
226+
if !p.monitoringCfg.DisablePayments {
227+
p.paymentsMonitor.stop()
228+
}
202229
}
203230

204231
// Errors returns an error channel that any failures experienced by its

config.go

+3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ type config struct {
5454

5555
// DisableHtlc disables the collection of HTLCs metrics.
5656
DisableHtlc bool `long:"disablehtlc" description:"Do not collect HTLCs metrics"`
57+
58+
// DisablePayments disables the collection of payments metrics.
59+
DisablePayments bool `long:"disablepayments" description:"Do not collect payments metrics"`
5760
}
5861

5962
var defaultConfig = config{

lndmon.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ func start() error {
6363
defer lnd.Close()
6464

6565
monitoringCfg := collectors.MonitoringConfig{
66-
DisableGraph: cfg.DisableGraph,
67-
DisableHtlc: cfg.DisableHtlc,
66+
DisableGraph: cfg.DisableGraph,
67+
DisableHtlc: cfg.DisableHtlc,
68+
DisablePayments: cfg.DisablePayments,
6869
}
6970
if cfg.PrimaryNode != "" {
7071
primaryNode, err := route.NewVertexFromStr(cfg.PrimaryNode)

0 commit comments

Comments
 (0)