Skip to content

Commit a7fa98b

Browse files
committed
Merge branch 'traceparent'
2 parents d1704f5 + 4b017a4 commit a7fa98b

File tree

5 files changed

+39
-191
lines changed

5 files changed

+39
-191
lines changed

cmd/deploy/main.go

+18-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
"github.com/nais/deploy/pkg/pb"
1010
"github.com/nais/deploy/pkg/telemetry"
1111
"github.com/nais/deploy/pkg/version"
12+
"go.opentelemetry.io/otel/attribute"
13+
otrace "go.opentelemetry.io/otel/trace"
1214
"google.golang.org/protobuf/encoding/protojson"
1315

1416
log "github.com/sirupsen/logrus"
@@ -52,15 +54,29 @@ func run() error {
5254
}
5355
}()
5456

57+
// Inherit traceparent from pipeline, if any
58+
ctx := telemetry.WithTraceParent(programContext, cfg.Traceparent)
59+
ctx, span := telemetry.Tracer().Start(ctx, "NAIS deploy", otrace.WithSpanKind(otrace.SpanKindClient))
60+
defer span.End()
61+
62+
span.SetAttributes(attribute.KeyValue{
63+
Key: "deploy.client.version",
64+
Value: attribute.StringValue(version.Version()),
65+
})
66+
5567
// Welcome
5668
log.Infof("NAIS deploy %s", version.Version())
5769
ts, err := version.BuildTime()
5870
if err == nil {
71+
span.SetAttributes(attribute.KeyValue{
72+
Key: "deploy.client.build-time",
73+
Value: attribute.StringValue(ts.Local().String()),
74+
})
5975
log.Infof("This version was built %s", ts.Local())
6076
}
6177

6278
// Prepare request
63-
request, err := deployclient.Prepare(programContext, cfg)
79+
request, err := deployclient.Prepare(ctx, cfg)
6480
if err != nil {
6581
return err
6682
}
@@ -89,5 +105,5 @@ func run() error {
89105
return nil
90106
}
91107

92-
return d.Deploy(programContext, cfg, request)
108+
return d.Deploy(ctx, cfg, request)
93109
}

pkg/deployclient/config.go

+2-10
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@ package deployclient
22

33
import (
44
"encoding/hex"
5-
"fmt"
65
"os"
76
"strconv"
87
"strings"
98
"time"
109

11-
"github.com/nais/deploy/pkg/telemetry"
1210
flag "github.com/spf13/pflag"
1311
)
1412

@@ -32,8 +30,7 @@ type Config struct {
3230
Retry bool
3331
RetryInterval time.Duration
3432
Team string
35-
TelemetryInput string
36-
Telemetry *telemetry.PipelineTimings
33+
Traceparent string
3734
Timeout time.Duration
3835
TracingDashboardURL string
3936
OpenTelemetryCollectorURL string
@@ -61,7 +58,7 @@ func InitConfig(cfg *Config) {
6158
flag.BoolVar(&cfg.Retry, "retry", getEnvBool("RETRY", true), "Retry deploy when encountering transient errors. (env RETRY)")
6259
flag.StringVar(&cfg.Team, "team", os.Getenv("TEAM"), "Team making the deployment. Auto-detected from nais.yaml if possible. (env TEAM)")
6360
flag.StringVar(&cfg.OpenTelemetryCollectorURL, "otel-collector-endpoint", getEnv("OTEL_COLLECTOR_ENDPOINT", DefaultOtelCollectorEndpoint), "OpenTelemetry collector endpoint. (env OTEL_COLLECTOR_ENDPOINT)")
64-
flag.StringVar(&cfg.TelemetryInput, "telemetry", os.Getenv("TELEMETRY"), "Telemetry data from CI pipeline. (env TELEMETRY)")
61+
flag.StringVar(&cfg.Traceparent, "traceparent", os.Getenv("TRACEPARENT"), "The W3C Trace Context traceparent value for the workflow run. (env TRACEPARENT)")
6562
flag.DurationVar(&cfg.Timeout, "timeout", getEnvDuration("TIMEOUT", DefaultDeployTimeout), "Time to wait for successful deployment. (env TIMEOUT)")
6663
flag.StringVar(&cfg.TracingDashboardURL, "tracing-dashboard-url", getEnv("TRACING_DASHBOARD_URL", DefaultTracingDashboardURL), "Base URL to Grafana tracing dashboard onto which the trace ID can be appended (env TRACING_DASHBOARD_URL)")
6764
flag.StringSliceVar(&cfg.Variables, "var", getEnvStringSlice("VAR"), "Template variable in the form KEY=VALUE. Can be specified multiple times. (env VAR)")
@@ -138,10 +135,5 @@ func (cfg *Config) Validate() error {
138135
return ErrMalformedAPIKey
139136
}
140137

141-
cfg.Telemetry, err = telemetry.ParsePipelineTelemetry(cfg.TelemetryInput)
142-
if err != nil {
143-
return fmt.Errorf("%w: %w", ErrInvalidTelemetryFormat, err)
144-
}
145-
146138
return nil
147139
}

pkg/deployclient/deployclient.go

+16-11
Original file line numberDiff line numberDiff line change
@@ -161,43 +161,43 @@ func (d *Deployer) Deploy(ctx context.Context, cfg *Config, deployRequest *pb.De
161161

162162
// Root span for tracing.
163163
// All sub-spans must be created from this context.
164-
ctx, rootSpan := cfg.Telemetry.StartTracing(ctx)
165-
defer rootSpan.End()
164+
ctx, span := telemetry.Tracer().Start(ctx, "Send deploy request and wait for completion")
165+
defer span.End()
166166
deployRequest.TraceParent = telemetry.TraceParentHeader(ctx)
167167

168168
log.Infof("Sending deployment request to NAIS deploy at %s...", cfg.DeployServerURL)
169169

170170
sendDeploymentRequest := func() error {
171-
ctx, span := telemetry.Tracer().Start(ctx, "Send to deploy server")
172-
defer span.End()
171+
requestContext, requestSpan := telemetry.Tracer().Start(ctx, "Send to deploy server")
172+
defer requestSpan.End()
173173

174174
err = retryUnavailable(cfg.RetryInterval, cfg.Retry, func() error {
175-
deployStatus, err = d.Client.Deploy(ctx, deployRequest)
175+
deployStatus, err = d.Client.Deploy(requestContext, deployRequest)
176176
return err
177177
})
178178

179179
if err != nil {
180180
code := grpcErrorCode(err)
181181
err = fmt.Errorf(formatGrpcError(err))
182-
if ctx.Err() != nil {
183-
span.SetStatus(ocodes.Error, ctx.Err().Error())
184-
return Errorf(ExitTimeout, "deployment timed out: %s", ctx.Err())
182+
if requestContext.Err() != nil {
183+
requestSpan.SetStatus(ocodes.Error, requestContext.Err().Error())
184+
return Errorf(ExitTimeout, "deployment timed out: %s", requestContext.Err())
185185
}
186186
if code == codes.Unauthenticated {
187187
if !strings.HasSuffix(cfg.Environment, ":"+cfg.Team) {
188188
log.Warnf("hint: team %q does not match namespace in %q", cfg.Team, cfg.Environment)
189189
}
190190
}
191-
span.SetStatus(ocodes.Error, err.Error())
191+
requestSpan.SetStatus(ocodes.Error, err.Error())
192192
return ErrorWrap(ExitNoDeployment, err)
193193
}
194194

195195
log.Infof("Deployment request accepted by NAIS deploy and dispatched to cluster '%s'.", deployStatus.GetRequest().GetCluster())
196196

197197
deployRequest.ID = deployStatus.GetRequest().GetID()
198-
telemetry.AddDeploymentRequestSpanAttributes(rootSpan, deployStatus.GetRequest())
199198
telemetry.AddDeploymentRequestSpanAttributes(span, deployStatus.GetRequest())
200-
traceID := telemetry.TraceID(ctx)
199+
telemetry.AddDeploymentRequestSpanAttributes(requestSpan, deployStatus.GetRequest())
200+
traceID := telemetry.TraceID(requestContext)
201201

202202
urlPrefix := "https://" + strings.Split(cfg.DeployServerURL, ":")[0]
203203
log.Infof("Deployment information:")
@@ -214,7 +214,12 @@ func (d *Deployer) Deploy(ctx context.Context, cfg *Config, deployRequest *pb.De
214214
}
215215

216216
err = sendDeploymentRequest()
217+
218+
// First handle errors that might have occurred with the request itself.
219+
// Errors from underlying systems are handled later.
217220
if err != nil {
221+
span.SetStatus(ocodes.Error, err.Error())
222+
span.RecordError(err)
218223
return err
219224
}
220225

pkg/telemetry/telemetry.go

+3-96
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@ package telemetry
44

55
import (
66
"context"
7-
"fmt"
87
"runtime"
9-
"strconv"
108
"strings"
119
"time"
1210

@@ -124,104 +122,13 @@ func AddDeploymentRequestSpanAttributes(span otrace.Span, request *pb.Deployment
124122
}, attribute.KeyValue{
125123
Key: "deploy.repository",
126124
Value: attribute.StringValue(request.GetRepository().FullName()),
125+
}, attribute.KeyValue{
126+
Key: "deploy.deadline",
127+
Value: attribute.StringValue(request.GetDeadline().AsTime().Local().Format(time.RFC3339)),
127128
},
128129
)
129130
}
130131

131-
// Holds timestamps from pipeline indicating when certain steps were started or finished.
132-
// If `Validate()` returns nil, this object is safe to use and contains chronologically ordered timestamps
133-
// for every field.
134-
type PipelineTimings struct {
135-
LatestCommit time.Time
136-
Start time.Time
137-
BuildStart time.Time
138-
AttestStart time.Time
139-
End time.Time
140-
}
141-
142-
func (pt *PipelineTimings) Validate() error {
143-
if pt.LatestCommit.After(pt.BuildStart) || pt.Start.After(pt.BuildStart) || pt.BuildStart.After(pt.AttestStart) || pt.AttestStart.After(pt.End) {
144-
return fmt.Errorf("pipeline timings are not in expected chronological order, ensure that: latest_commit < pipeline_start < build_start < attest_start < pipeline_end")
145-
}
146-
return nil
147-
}
148-
149-
func (pt *PipelineTimings) StartTracing(ctx context.Context) (context.Context, otrace.Span) {
150-
if pt == nil {
151-
return Tracer().Start(ctx, "Continuous integration pipeline", otrace.WithSpanKind(otrace.SpanKindClient))
152-
}
153-
154-
rootCtx, rootSpan := Tracer().Start(ctx, "Continuous integration pipeline", otrace.WithTimestamp(pt.LatestCommit), otrace.WithSpanKind(otrace.SpanKindClient))
155-
rootSpan.AddEvent("Latest commit to repository", otrace.WithTimestamp(pt.LatestCommit))
156-
{
157-
ciCtx, ciSpan := Tracer().Start(rootCtx, "Github Action: docker-build-push", otrace.WithTimestamp(pt.Start), otrace.WithSpanKind(otrace.SpanKindClient))
158-
{
159-
_, buildSpan := Tracer().Start(ciCtx, "Docker: Build and push", otrace.WithTimestamp(pt.BuildStart))
160-
buildSpan.End(otrace.WithTimestamp(pt.AttestStart))
161-
}
162-
{
163-
_, attestSpan := Tracer().Start(ciCtx, "SLSA: SBOM sign and attest", otrace.WithTimestamp(pt.AttestStart))
164-
attestSpan.End(otrace.WithTimestamp(pt.End))
165-
}
166-
ciSpan.End(otrace.WithTimestamp(pt.End))
167-
}
168-
169-
return rootCtx, rootSpan
170-
}
171-
172-
// Parse pipeline build timings.
173-
//
174-
// Uses the following input format:
175-
//
176-
// latest_commit=1726040395,pipeline_start=1726050395,pipeline_end=1726050512,build_start=1726050400,attest_start=1726050492
177-
//
178-
// This output usually comes from `docker-build-push.steps.output.telemetry`.
179-
//
180-
// If there is no timing data, both return values will be nil.
181-
// If all timing data is valid, returns a timings object and nil error.
182-
func ParsePipelineTelemetry(s string) (*PipelineTimings, error) {
183-
if len(s) == 0 {
184-
return nil, nil
185-
}
186-
187-
timings := &PipelineTimings{}
188-
fragments := strings.Split(s, ",")
189-
for _, keyValue := range fragments {
190-
key, value, found := strings.Cut(keyValue, "=")
191-
if !found {
192-
return nil, fmt.Errorf("expected 'key=value', found '%s'", keyValue)
193-
}
194-
195-
epoch, err := strconv.Atoi(value)
196-
if err != nil {
197-
return nil, fmt.Errorf("expected UNIX epoch, found '%s'", value)
198-
}
199-
200-
ts := time.Unix(int64(epoch), 0)
201-
ts = ts.UTC()
202-
203-
switch key {
204-
case "latest_commit":
205-
timings.LatestCommit = ts
206-
case "pipeline_start":
207-
timings.Start = ts
208-
case "pipeline_end":
209-
timings.End = ts
210-
case "build_start":
211-
timings.BuildStart = ts
212-
case "attest_start":
213-
timings.AttestStart = ts
214-
default:
215-
return nil, fmt.Errorf("expected key to be one of 'latest_commit', 'pipeline_start', 'pipeline_end', 'build_start', 'attest_start'; found '%s'", key)
216-
}
217-
}
218-
err := timings.Validate()
219-
if err != nil {
220-
return nil, err
221-
}
222-
return timings, nil
223-
}
224-
225132
func newPropagator() propagation.TextMapPropagator {
226133
return propagation.NewCompositeTextMapPropagator(
227134
propagation.TraceContext{},

pkg/telemetry/telemetry_test.go

-72
Original file line numberDiff line numberDiff line change
@@ -3,83 +3,11 @@ package telemetry_test
33
import (
44
"context"
55
"testing"
6-
"time"
76

87
"github.com/nais/deploy/pkg/telemetry"
98
"github.com/stretchr/testify/assert"
109
)
1110

12-
func TestParsePipelineTelemetry(t *testing.T) {
13-
t.Run("default case with five timings in correct order without quoting", func(t *testing.T) {
14-
input := "latest_commit=1726040395,pipeline_start=1726050395,pipeline_end=1726050512,build_start=1726050400,attest_start=1726050492"
15-
expected := &telemetry.PipelineTimings{
16-
LatestCommit: time.Date(2024, time.September, 11, 7, 39, 55, 0, time.UTC),
17-
Start: time.Date(2024, time.September, 11, 10, 26, 35, 0, time.UTC),
18-
BuildStart: time.Date(2024, time.September, 11, 10, 26, 40, 0, time.UTC),
19-
AttestStart: time.Date(2024, time.September, 11, 10, 28, 12, 0, time.UTC),
20-
End: time.Date(2024, time.September, 11, 10, 28, 32, 0, time.UTC),
21-
}
22-
output, err := telemetry.ParsePipelineTelemetry(input)
23-
assert.NoError(t, err)
24-
assert.Equal(t, expected, output)
25-
})
26-
27-
t.Run("missing some of the timings", func(t *testing.T) {
28-
input := "pipeline_start=1726050395,pipeline_end=1726050512"
29-
output, err := telemetry.ParsePipelineTelemetry(input)
30-
assert.EqualError(t, err, "pipeline timings are not in expected chronological order, ensure that: latest_commit < pipeline_start < build_start < attest_start < pipeline_end")
31-
assert.Nil(t, output)
32-
})
33-
34-
t.Run("wrong timing order", func(t *testing.T) {
35-
for _, input := range []string{
36-
"pipeline_start=2,build_start=1",
37-
"build_start=2,attest_start=1",
38-
"attest_start=2,pipeline_end=1",
39-
"pipeline_start=2,pipeline_end=1",
40-
} {
41-
output, err := telemetry.ParsePipelineTelemetry(input)
42-
assert.EqualError(t, err, "pipeline timings are not in expected chronological order, ensure that: latest_commit < pipeline_start < build_start < attest_start < pipeline_end")
43-
assert.Nil(t, output)
44-
}
45-
})
46-
47-
t.Run("unexpected timing parameter", func(t *testing.T) {
48-
input := "pipeline_start=1,foobar=2"
49-
output, err := telemetry.ParsePipelineTelemetry(input)
50-
assert.EqualError(t, err, "expected key to be one of 'latest_commit', 'pipeline_start', 'pipeline_end', 'build_start', 'attest_start'; found 'foobar'")
51-
assert.Nil(t, output)
52-
})
53-
54-
t.Run("timing parameter not an integer", func(t *testing.T) {
55-
input := "pipeline_start=2024-09-11"
56-
output, err := telemetry.ParsePipelineTelemetry(input)
57-
assert.EqualError(t, err, "expected UNIX epoch, found '2024-09-11'")
58-
assert.Nil(t, output)
59-
})
60-
61-
t.Run("parameter list missing value", func(t *testing.T) {
62-
input := "pipeline_start=1,pipeline_end"
63-
output, err := telemetry.ParsePipelineTelemetry(input)
64-
assert.EqualError(t, err, "expected 'key=value', found 'pipeline_end'")
65-
assert.Nil(t, output)
66-
})
67-
68-
t.Run("parameter list missing key", func(t *testing.T) {
69-
input := "pipeline_start=1,=2"
70-
output, err := telemetry.ParsePipelineTelemetry(input)
71-
assert.EqualError(t, err, "expected key to be one of 'latest_commit', 'pipeline_start', 'pipeline_end', 'build_start', 'attest_start'; found ''")
72-
assert.Nil(t, output)
73-
})
74-
75-
t.Run("no data", func(t *testing.T) {
76-
input := ""
77-
output, err := telemetry.ParsePipelineTelemetry(input)
78-
assert.NoError(t, err)
79-
assert.Nil(t, output)
80-
})
81-
}
82-
8311
func TestTraceID(t *testing.T) {
8412
t.Run("happy case", func(t *testing.T) {
8513
traceParentHeader := "00-ada6313c1a5b6ffdf0d085fadc3265cb-6018288557ffff51-01"

0 commit comments

Comments
 (0)