Skip to content

Commit ce1169f

Browse files
Tracing: Support remote, rate-limited, and probabilistic sampling in tracing.opentelemetry config section (grafana#73587)
* tracing: Support remote sampling server Signed-off-by: Dave Henderson <[email protected]> * Update docs/sources/setup-grafana/configure-grafana/_index.md Co-authored-by: Christopher Moyer <[email protected]> * Update docs/sources/setup-grafana/configure-grafana/_index.md Co-authored-by: Christopher Moyer <[email protected]> * Update docs/sources/setup-grafana/configure-grafana/_index.md Co-authored-by: Christopher Moyer <[email protected]> * Update docs/sources/setup-grafana/configure-grafana/_index.md Co-authored-by: Christopher Moyer <[email protected]> * Update docs/sources/setup-grafana/configure-grafana/_index.md * Update docs/sources/setup-grafana/configure-grafana/_index.md * Update docs/sources/setup-grafana/configure-grafana/_index.md * Satisfying the doc-validator check * satisfy prettier Signed-off-by: Dave Henderson <[email protected]> * back out unnecessary change Signed-off-by: Dave Henderson <[email protected]> --------- Signed-off-by: Dave Henderson <[email protected]> Co-authored-by: Christopher Moyer <[email protected]>
1 parent 951876b commit ce1169f

File tree

6 files changed

+190
-31
lines changed

6 files changed

+190
-31
lines changed

conf/defaults.ini

+13-1
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,18 @@ disable_shared_zipkin_spans = false
13741374

13751375
# attributes that will always be included in when creating new spans. ex (key1:value1,key2:value2)
13761376
custom_attributes =
1377+
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
1378+
sampler_type =
1379+
# Sampler configuration parameter
1380+
# for "const" sampler, 0 or 1 for always false/true respectively
1381+
# for "probabilistic" sampler, a probability between 0.0 and 1.0
1382+
# for "rateLimiting" sampler, the number of spans per second
1383+
# for "remote" sampler, param is the same as for "probabilistic"
1384+
# and indicates the initial sampling rate before the actual one
1385+
# is received from the sampling server (set at sampling_server_url)
1386+
sampler_param =
1387+
# specifies the URL of the sampling server when sampler_type is remote
1388+
sampling_server_url =
13771389

13781390
[tracing.opentelemetry.jaeger]
13791391
# jaeger destination (ex http://localhost:14268/api/traces)
@@ -1668,4 +1680,4 @@ update_controller_url =
16681680
hidden_toggles =
16691681

16701682
# Disables updating specific feature toggles in the feature management page
1671-
read_only_toggles =
1683+
read_only_toggles =

conf/sample.ini

+13-1
Original file line numberDiff line numberDiff line change
@@ -1272,6 +1272,18 @@
12721272
[tracing.opentelemetry]
12731273
# attributes that will always be included in when creating new spans. ex (key1:value1,key2:value2)
12741274
;custom_attributes = key1:value1,key2:value2
1275+
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
1276+
; sampler_type = remote
1277+
# Sampler configuration parameter
1278+
# for "const" sampler, 0 or 1 for always false/true respectively
1279+
# for "probabilistic" sampler, a probability between 0.0 and 1.0
1280+
# for "rateLimiting" sampler, the number of spans per second
1281+
# for "remote" sampler, param is the same as for "probabilistic"
1282+
# and indicates the initial sampling rate before the actual one
1283+
# is received from the sampling server (set at sampling_server_url)
1284+
; sampler_param = 0.5
1285+
# specifies the URL of the sampling server when sampler_type is remote
1286+
; sampling_server_url = http://localhost:5778/sampling
12751287

12761288
[tracing.opentelemetry.jaeger]
12771289
# jaeger destination (ex http://localhost:14268/api/traces)
@@ -1535,4 +1547,4 @@
15351547
# Hide specific feature toggles from the feature management page
15361548
;hidden_toggles =
15371549
# Disable updating specific feature toggles in the feature management page
1538-
;read_only_toggles =
1550+
;read_only_toggles =

docs/sources/setup-grafana/configure-grafana/_index.md

+31
Original file line numberDiff line numberDiff line change
@@ -1801,6 +1801,8 @@ Refer to https://www.jaegertracing.io/docs/1.16/sampling/#client-sampling-config
18011801

18021802
Can be set with the environment variable `JAEGER_SAMPLER_TYPE`.
18031803

1804+
_To override this setting, enter `sampler_type` in the `tracing.opentelemetry` section._
1805+
18041806
### sampler_param
18051807

18061808
Default value is `1`.
@@ -1816,10 +1818,14 @@ This is the sampler configuration parameter. Depending on the value of `sampler_
18161818

18171819
May be set with the environment variable `JAEGER_SAMPLER_PARAM`.
18181820

1821+
_Setting `sampler_param` in the `tracing.opentelemetry` section will override this setting._
1822+
18191823
### sampling_server_url
18201824

18211825
sampling_server_url is the URL of a sampling manager providing a sampling strategy.
18221826

1827+
_Setting `sampling_server_url` in the `tracing.opentelemetry` section will override this setting._
1828+
18231829
### zipkin_propagation
18241830

18251831
Default value is `false`.
@@ -1846,6 +1852,31 @@ Comma-separated list of attributes to include in all new spans, such as `key1:va
18461852

18471853
Can be set with the environment variable `OTEL_RESOURCE_ATTRIBUTES` (use `=` instead of `:` with the environment variable).
18481854

1855+
### sampler_type
1856+
1857+
Default value is `const`.
1858+
1859+
Specifies the type of sampler: `const`, `probabilistic`, `ratelimiting`, or `remote`.
1860+
1861+
### sampler_param
1862+
1863+
Default value is `1`.
1864+
1865+
Depending on the value of `sampler_type`, the sampler configuration parameter can be `0`, `1`, or any decimal value between `0` and `1`.
1866+
1867+
- For the `const` sampler, use `0` to never sample or `1` to always sample
1868+
- For the `probabilistic` sampler, you can use a decimal value between `0.0` and `1.0`
1869+
- For the `rateLimiting` sampler, enter the number of spans per second
1870+
- For the `remote` sampler, use a decimal value between `0.0` and `1.0`
1871+
to specify the initial sampling rate used before the first update
1872+
is received from the sampling server
1873+
1874+
### sampling_server_url
1875+
1876+
When `sampler_type` is `remote`, this specifies the URL of the sampling server. This can be used by all tracing providers.
1877+
1878+
Use a sampling server that supports the Jaeger remote sampling API, such as jaeger-agent, jaeger-collector, opentelemetry-collector-contrib, or [Grafana Agent](/oss/agent/).
1879+
18491880
<hr>
18501881

18511882
## [tracing.opentelemetry.jaeger]

pkg/infra/tracing/test_helper.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@ import (
77
"go.opentelemetry.io/otel"
88
"go.opentelemetry.io/otel/attribute"
99
"go.opentelemetry.io/otel/codes"
10+
tracesdk "go.opentelemetry.io/otel/sdk/trace"
1011
"go.opentelemetry.io/otel/sdk/trace/tracetest"
1112
"go.opentelemetry.io/otel/trace"
1213
)
1314

1415
func InitializeTracerForTest() Tracer {
1516
exp := tracetest.NewInMemoryExporter()
16-
tp, _ := initTracerProvider(exp, "testing")
17+
tp, _ := initTracerProvider(exp, "testing", tracesdk.AlwaysSample())
1718
otel.SetTracerProvider(tp)
1819

1920
ots := &Opentelemetry{Propagation: "jaeger,w3c", tracerProvider: tp}

pkg/infra/tracing/tracing.go

+63-25
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,22 @@ func (ots *Opentelemetry) parseSettings() error {
224224
return err
225225
}
226226

227+
// if sampler_type is set in tracing.opentelemetry, we ignore the config in tracing.jaeger
228+
sampler := section.Key("sampler_type").MustString("")
229+
if sampler != "" {
230+
ots.sampler = sampler
231+
}
232+
233+
samplerParam := section.Key("sampler_param").MustFloat64(0)
234+
if samplerParam != 0 {
235+
ots.samplerParam = samplerParam
236+
}
237+
238+
samplerRemoteURL := section.Key("sampling_server_url").MustString("")
239+
if samplerRemoteURL != "" {
240+
ots.samplerRemoteURL = samplerRemoteURL
241+
}
242+
227243
section = ots.Cfg.Raw.Section("tracing.opentelemetry.jaeger")
228244
ots.enabled = noopExporter
229245

@@ -295,16 +311,9 @@ func (ots *Opentelemetry) initJaegerTracerProvider() (*tracesdk.TracerProvider,
295311
return nil, err
296312
}
297313

298-
sampler := tracesdk.AlwaysSample()
299-
if ots.sampler == "const" || ots.sampler == "probabilistic" {
300-
sampler = tracesdk.TraceIDRatioBased(ots.samplerParam)
301-
} else if ots.sampler == "rateLimiting" {
302-
sampler = newRateLimiter(ots.samplerParam)
303-
} else if ots.sampler == "remote" {
304-
sampler = jaegerremote.New("grafana", jaegerremote.WithSamplingServerURL(ots.samplerRemoteURL),
305-
jaegerremote.WithInitialSampler(tracesdk.TraceIDRatioBased(ots.samplerParam)))
306-
} else if ots.sampler != "" {
307-
return nil, fmt.Errorf("invalid sampler type: %s", ots.sampler)
314+
sampler, err := ots.initSampler()
315+
if err != nil {
316+
return nil, err
308317
}
309318

310319
tp := tracesdk.NewTracerProvider(
@@ -323,10 +332,39 @@ func (ots *Opentelemetry) initOTLPTracerProvider() (*tracesdk.TracerProvider, er
323332
return nil, err
324333
}
325334

326-
return initTracerProvider(exp, ots.Cfg.BuildVersion, ots.customAttribs...)
335+
sampler, err := ots.initSampler()
336+
if err != nil {
337+
return nil, err
338+
}
339+
340+
return initTracerProvider(exp, ots.Cfg.BuildVersion, sampler, ots.customAttribs...)
341+
}
342+
343+
func (ots *Opentelemetry) initSampler() (tracesdk.Sampler, error) {
344+
switch ots.sampler {
345+
case "const", "":
346+
if ots.samplerParam >= 1 {
347+
return tracesdk.AlwaysSample(), nil
348+
} else if ots.samplerParam <= 0 {
349+
return tracesdk.NeverSample(), nil
350+
}
351+
352+
return nil, fmt.Errorf("invalid param for const sampler - must be 0 or 1: %f", ots.samplerParam)
353+
case "probabilistic":
354+
return tracesdk.TraceIDRatioBased(ots.samplerParam), nil
355+
case "rateLimiting":
356+
return newRateLimiter(ots.samplerParam), nil
357+
case "remote":
358+
return jaegerremote.New("grafana",
359+
jaegerremote.WithSamplingServerURL(ots.samplerRemoteURL),
360+
jaegerremote.WithInitialSampler(tracesdk.TraceIDRatioBased(ots.samplerParam)),
361+
), nil
362+
default:
363+
return nil, fmt.Errorf("invalid sampler type: %s", ots.sampler)
364+
}
327365
}
328366

329-
func initTracerProvider(exp tracesdk.SpanExporter, version string, customAttribs ...attribute.KeyValue) (*tracesdk.TracerProvider, error) {
367+
func initTracerProvider(exp tracesdk.SpanExporter, version string, sampler tracesdk.Sampler, customAttribs ...attribute.KeyValue) (*tracesdk.TracerProvider, error) {
330368
res, err := resource.New(
331369
context.Background(),
332370
resource.WithAttributes(
@@ -343,9 +381,7 @@ func initTracerProvider(exp tracesdk.SpanExporter, version string, customAttribs
343381

344382
tp := tracesdk.NewTracerProvider(
345383
tracesdk.WithBatcher(exp),
346-
tracesdk.WithSampler(tracesdk.ParentBased(
347-
tracesdk.AlwaysSample(),
348-
)),
384+
tracesdk.WithSampler(tracesdk.ParentBased(sampler)),
349385
tracesdk.WithResource(res),
350386
)
351387
return tp, nil
@@ -501,21 +537,23 @@ func (s OpentelemetrySpan) ContextWithSpan(ctx context.Context) context.Context
501537

502538
type rateLimiter struct {
503539
sync.Mutex
504-
rps float64
505-
balance float64
506-
maxBalance float64
507-
lastTick time.Time
540+
description string
541+
rps float64
542+
balance float64
543+
maxBalance float64
544+
lastTick time.Time
508545

509546
now func() time.Time
510547
}
511548

512549
func newRateLimiter(rps float64) *rateLimiter {
513550
return &rateLimiter{
514-
rps: rps,
515-
balance: math.Max(rps, 1),
516-
maxBalance: math.Max(rps, 1),
517-
lastTick: time.Now(),
518-
now: time.Now,
551+
rps: rps,
552+
description: fmt.Sprintf("RateLimitingSampler{%g}", rps),
553+
balance: math.Max(rps, 1),
554+
maxBalance: math.Max(rps, 1),
555+
lastTick: time.Now(),
556+
now: time.Now,
519557
}
520558
}
521559

@@ -538,4 +576,4 @@ func (rl *rateLimiter) ShouldSample(p tracesdk.SamplingParameters) tracesdk.Samp
538576
return tracesdk.SamplingResult{Decision: tracesdk.Drop, Tracestate: psc.TraceState()}
539577
}
540578

541-
func (rl *rateLimiter) Description() string { return "RateLimitingSampler" }
579+
func (rl *rateLimiter) Description() string { return rl.description }

pkg/infra/tracing/tracing_test.go

+68-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"testing"
55

66
"github.com/stretchr/testify/assert"
7+
"github.com/stretchr/testify/require"
78
"go.opentelemetry.io/otel/attribute"
89

910
"github.com/grafana/grafana/pkg/setting"
@@ -63,6 +64,10 @@ func TestTracingConfig(t *testing.T) {
6364
ExpectedAddress string
6465
ExpectedPropagator string
6566
ExpectedAttrs []attribute.KeyValue
67+
68+
ExpectedSampler string
69+
ExpectedSamplerParam float64
70+
ExpectedSamplingServerURL string
6671
}{
6772
{
6873
Name: "default config uses noop exporter",
@@ -126,14 +131,34 @@ func TestTracingConfig(t *testing.T) {
126131
[tracing.jaeger]
127132
address = foo.com:6831
128133
custom_tags = a:b
134+
sampler_param = 0
129135
[tracing.opentelemetry]
130136
custom_attributes = c:d
137+
sampler_param = 1
131138
[tracing.opentelemetry.jaeger]
132139
address = bar.com:6831
133140
`,
134-
ExpectedExporter: jaegerExporter,
135-
ExpectedAddress: "bar.com:6831",
136-
ExpectedAttrs: []attribute.KeyValue{attribute.String("c", "d")},
141+
ExpectedExporter: jaegerExporter,
142+
ExpectedAddress: "bar.com:6831",
143+
ExpectedAttrs: []attribute.KeyValue{attribute.String("c", "d")},
144+
ExpectedSamplerParam: 1.0,
145+
},
146+
{
147+
Name: "remote sampler config is parsed from otel config",
148+
Cfg: `
149+
[tracing.opentelemetry]
150+
sampler_type = remote
151+
sampler_param = 0.5
152+
sampling_server_url = http://example.com:5778/sampling
153+
[tracing.opentelemetry.otlp]
154+
address = otlp.example.com:4317
155+
`,
156+
ExpectedExporter: otlpExporter,
157+
ExpectedAddress: "otlp.example.com:4317",
158+
ExpectedAttrs: []attribute.KeyValue{},
159+
ExpectedSampler: "remote",
160+
ExpectedSamplerParam: 0.5,
161+
ExpectedSamplingServerURL: "http://example.com:5778/sampling",
137162
},
138163
} {
139164
t.Run(test.Name, func(t *testing.T) {
@@ -156,6 +181,46 @@ func TestTracingConfig(t *testing.T) {
156181
assert.Equal(t, test.ExpectedAddress, otel.Address)
157182
assert.Equal(t, test.ExpectedPropagator, otel.Propagation)
158183
assert.Equal(t, test.ExpectedAttrs, otel.customAttribs)
184+
185+
if test.ExpectedSampler != "" {
186+
assert.Equal(t, test.ExpectedSampler, otel.sampler)
187+
assert.Equal(t, test.ExpectedSamplerParam, otel.samplerParam)
188+
assert.Equal(t, test.ExpectedSamplingServerURL, otel.samplerRemoteURL)
189+
}
159190
})
160191
}
161192
}
193+
194+
func TestInitSampler(t *testing.T) {
195+
otel := &Opentelemetry{}
196+
sampler, err := otel.initSampler()
197+
require.NoError(t, err)
198+
assert.Equal(t, "AlwaysOffSampler", sampler.Description())
199+
200+
otel.sampler = "bogus"
201+
_, err = otel.initSampler()
202+
require.Error(t, err)
203+
204+
otel.sampler = "const"
205+
otel.samplerParam = 0.5
206+
_, err = otel.initSampler()
207+
require.Error(t, err)
208+
209+
otel.sampler = "const"
210+
otel.samplerParam = 1.0
211+
sampler, err = otel.initSampler()
212+
require.NoError(t, err)
213+
assert.Equal(t, "AlwaysOnSampler", sampler.Description())
214+
215+
otel.sampler = "probabilistic"
216+
otel.samplerParam = 0.5
217+
sampler, err = otel.initSampler()
218+
require.NoError(t, err)
219+
assert.Equal(t, "TraceIDRatioBased{0.5}", sampler.Description())
220+
221+
otel.sampler = "rateLimiting"
222+
otel.samplerParam = 100.25
223+
sampler, err = otel.initSampler()
224+
require.NoError(t, err)
225+
assert.Equal(t, "RateLimitingSampler{100.25}", sampler.Description())
226+
}

0 commit comments

Comments
 (0)