@@ -17,6 +17,8 @@ package target
17
17
import (
18
18
"hash"
19
19
"hash/fnv"
20
+ "sync"
21
+ "time"
20
22
21
23
"github.com/go-logr/logr"
22
24
"github.com/prometheus/client_golang/prometheus"
@@ -27,6 +29,7 @@ import (
27
29
"github.com/prometheus/prometheus/discovery/targetgroup"
28
30
"github.com/prometheus/prometheus/model/labels"
29
31
"github.com/prometheus/prometheus/model/relabel"
32
+ "go.uber.org/zap/zapcore"
30
33
"gopkg.in/yaml.v3"
31
34
32
35
allocatorWatcher "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/watcher"
@@ -37,16 +40,33 @@ var (
37
40
Name : "opentelemetry_allocator_targets" ,
38
41
Help : "Number of targets discovered." ,
39
42
}, []string {"job_name" })
43
+
44
+ processTargetsDuration = promauto .NewHistogram (prometheus.HistogramOpts {
45
+ Name : "opentelemetry_allocator_process_targets_duration_seconds" ,
46
+ Help : "Duration of processing targets." ,
47
+ Buckets : []float64 {1 , 5 , 10 , 30 , 60 , 120 },
48
+ })
49
+
50
+ processTargetGroupsDuration = promauto .NewHistogramVec (prometheus.HistogramOpts {
51
+ Name : "opentelemetry_allocator_process_target_groups_duration_seconds" ,
52
+ Help : "Duration of processing target groups." ,
53
+ Buckets : []float64 {1 , 5 , 10 , 30 , 60 , 120 },
54
+ }, []string {"job_name" })
40
55
)
41
56
42
57
type Discoverer struct {
43
- log logr.Logger
44
- manager * discovery.Manager
45
- close chan struct {}
46
- configsMap map [allocatorWatcher.EventSource ][]* promconfig.ScrapeConfig
47
- hook discoveryHook
48
- scrapeConfigsHash hash.Hash
49
- scrapeConfigsUpdater scrapeConfigsUpdater
58
+ log logr.Logger
59
+ manager * discovery.Manager
60
+ close chan struct {}
61
+ mtxScrape sync.Mutex // Guards the fields below.
62
+ configsMap map [allocatorWatcher.EventSource ][]* promconfig.ScrapeConfig
63
+ hook discoveryHook
64
+ scrapeConfigsHash hash.Hash
65
+ scrapeConfigsUpdater scrapeConfigsUpdater
66
+ targetSets map [string ][]* targetgroup.Group
67
+ triggerReload chan struct {}
68
+ processTargetsCallBack func (targets map [string ]* Item )
69
+ mtxTargets sync.Mutex
50
70
}
51
71
52
72
type discoveryHook interface {
@@ -57,15 +77,17 @@ type scrapeConfigsUpdater interface {
57
77
UpdateScrapeConfigResponse (map [string ]* promconfig.ScrapeConfig ) error
58
78
}
59
79
60
- func NewDiscoverer (log logr.Logger , manager * discovery.Manager , hook discoveryHook , scrapeConfigsUpdater scrapeConfigsUpdater ) * Discoverer {
80
+ func NewDiscoverer (log logr.Logger , manager * discovery.Manager , hook discoveryHook , scrapeConfigsUpdater scrapeConfigsUpdater , setTargets func ( targets map [ string ] * Item ) ) * Discoverer {
61
81
return & Discoverer {
62
- log : log ,
63
- manager : manager ,
64
- close : make (chan struct {}),
65
- configsMap : make (map [allocatorWatcher.EventSource ][]* promconfig.ScrapeConfig ),
66
- hook : hook ,
67
- scrapeConfigsHash : nil , // we want the first update to succeed even if the config is empty
68
- scrapeConfigsUpdater : scrapeConfigsUpdater ,
82
+ log : log ,
83
+ manager : manager ,
84
+ close : make (chan struct {}),
85
+ triggerReload : make (chan struct {}, 1 ),
86
+ configsMap : make (map [allocatorWatcher.EventSource ][]* promconfig.ScrapeConfig ),
87
+ hook : hook ,
88
+ scrapeConfigsHash : nil , // we want the first update to succeed even if the config is empty
89
+ scrapeConfigsUpdater : scrapeConfigsUpdater ,
90
+ processTargetsCallBack : setTargets ,
69
91
}
70
92
}
71
93
@@ -105,43 +127,122 @@ func (m *Discoverer) ApplyConfig(source allocatorWatcher.EventSource, scrapeConf
105
127
return m .manager .ApplyConfig (discoveryCfg )
106
128
}
107
129
108
- func (m * Discoverer ) Watch (fn func (targets map [string ]* Item )) error {
109
- labelsBuilder := labels .NewBuilder (labels .EmptyLabels ())
130
+ func (m * Discoverer ) Run () error {
131
+ err := m .run (m .manager .SyncCh ())
132
+ if err != nil {
133
+ m .log .Error (err , "Service Discovery watch event failed" )
134
+ return err
135
+ }
136
+ <- m .close
137
+ m .log .Info ("Service Discovery watch event stopped: discovery manager closed" )
138
+ return nil
139
+ }
140
+
141
+ // UpdateTsets updates the target sets to be scraped.
142
+ func (m * Discoverer ) UpdateTsets (tsets map [string ][]* targetgroup.Group ) {
143
+ m .mtxScrape .Lock ()
144
+ m .targetSets = tsets
145
+ m .mtxScrape .Unlock ()
146
+ }
147
+
148
+ // reloader triggers a reload of the scrape configs at regular intervals.
149
+ // The time between reloads is defined by reloadIntervalDuration to avoid overloading the system
150
+ // with too many reloads, because some service discovery mechanisms can be quite chatty.
151
+ func (m * Discoverer ) reloader () {
152
+ reloadIntervalDuration := model .Duration (5 * time .Second )
153
+ ticker := time .NewTicker (time .Duration (reloadIntervalDuration ))
154
+
155
+ defer ticker .Stop ()
156
+
110
157
for {
111
158
select {
112
159
case <- m .close :
113
- m .log .Info ("Service Discovery watch event stopped: discovery manager closed" )
114
- return nil
115
- case tsets := <- m .manager .SyncCh ():
116
- m .ProcessTargets (labelsBuilder , tsets , fn )
160
+ return
161
+ case <- ticker .C :
162
+ select {
163
+ case <- m .triggerReload :
164
+ m .Reload ()
165
+ case <- m .close :
166
+ return
167
+ }
117
168
}
118
169
}
119
170
}
120
171
121
- func (m * Discoverer ) ProcessTargets (builder * labels.Builder , tsets map [string ][]* targetgroup.Group , fn func (targets map [string ]* Item )) {
172
+ // Reload triggers a reload of the scrape configs.
173
+ // This will process the target groups and update the targets concurrently.
174
+ func (m * Discoverer ) Reload () {
175
+ m .mtxScrape .Lock ()
176
+ var wg sync.WaitGroup
122
177
targets := map [string ]* Item {}
178
+ timer := prometheus .NewTimer (processTargetsDuration )
179
+ defer timer .ObserveDuration ()
180
+
181
+ for jobName , groups := range m .targetSets {
182
+ wg .Add (1 )
183
+ // Run the sync in parallel as these take a while and at high load can't catch up.
184
+ go func (jobName string , groups []* targetgroup.Group ) {
185
+ processedTargets := m .processTargetGroups (jobName , groups )
186
+ m .mtxTargets .Lock ()
187
+ for k , v := range processedTargets {
188
+ targets [k ] = v
189
+ }
190
+ m .mtxTargets .Unlock ()
191
+ wg .Done ()
192
+ }(jobName , groups )
193
+ }
194
+ m .mtxScrape .Unlock ()
195
+ wg .Wait ()
196
+ m .processTargetsCallBack (targets )
197
+ }
123
198
124
- for jobName , tgs := range tsets {
125
- var count float64 = 0
126
- for _ , tg := range tgs {
127
- builder .Reset (labels .EmptyLabels ())
128
- for ln , lv := range tg .Labels {
199
+ // processTargetGroups processes the target groups and returns a map of targets.
200
+ func (m * Discoverer ) processTargetGroups (jobName string , groups []* targetgroup.Group ) map [string ]* Item {
201
+ builder := labels .NewBuilder (labels.Labels {})
202
+ timer := prometheus .NewTimer (processTargetGroupsDuration .WithLabelValues (jobName ))
203
+ targets := map [string ]* Item {}
204
+ defer timer .ObserveDuration ()
205
+ var count float64 = 0
206
+ for _ , tg := range groups {
207
+ builder .Reset (labels .EmptyLabels ())
208
+ for ln , lv := range tg .Labels {
209
+ builder .Set (string (ln ), string (lv ))
210
+ }
211
+ groupLabels := builder .Labels ()
212
+ for _ , t := range tg .Targets {
213
+ count ++
214
+ builder .Reset (groupLabels )
215
+ for ln , lv := range t {
129
216
builder .Set (string (ln ), string (lv ))
130
217
}
131
- groupLabels := builder .Labels ()
132
- for _ , t := range tg .Targets {
133
- count ++
134
- builder .Reset (groupLabels )
135
- for ln , lv := range t {
136
- builder .Set (string (ln ), string (lv ))
137
- }
138
- item := NewItem (jobName , string (t [model .AddressLabel ]), builder .Labels (), "" )
139
- targets [item .Hash ()] = item
218
+ item := NewItem (jobName , string (t [model .AddressLabel ]), builder .Labels (), "" )
219
+ targets [item .Hash ()] = item
220
+ }
221
+ }
222
+ targetsDiscovered .WithLabelValues (jobName ).Set (count )
223
+ return targets
224
+ }
225
+
226
+ // Run receives and saves target set updates and triggers the scraping loops reloading.
227
+ // Reloading happens in the background so that it doesn't block receiving targets updates.
228
+ func (m * Discoverer ) run (tsets <- chan map [string ][]* targetgroup.Group ) error {
229
+ go m .reloader ()
230
+ for {
231
+ select {
232
+ case ts := <- tsets :
233
+ m .log .V (int (zapcore .DebugLevel )).Info ("Service Discovery watch event received" , "targets groups" , len (ts ))
234
+ m .UpdateTsets (ts )
235
+
236
+ select {
237
+ case m .triggerReload <- struct {}{}:
238
+ default :
140
239
}
240
+
241
+ case <- m .close :
242
+ m .log .Info ("Service Discovery watch event stopped: discovery manager closed" )
243
+ return nil
141
244
}
142
- targetsDiscovered .WithLabelValues (jobName ).Set (count )
143
245
}
144
- fn (targets )
145
246
}
146
247
147
248
func (m * Discoverer ) Close () {
0 commit comments