Skip to content

Commit 8061478

Browse files
authored
Fix timestamp conversion in opamp bridge (#3582)
Kubernetes (and Go in general) allow for signed unix timestamps representing dates before 01-01-1970. However, OpAMP only accepts unsigned timestamps. Until now, opamp bridge simply assumed the conversion could always be carried out. This change instead returns errors when either the bridge or the respective K8s resources have negative Unix timestamps.
1 parent 193a64c commit 8061478

File tree

4 files changed

+83
-34
lines changed

4 files changed

+83
-34
lines changed

cmd/operator-opamp-bridge/agent/agent.go

+55-10
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package agent
1717
import (
1818
"bytes"
1919
"context"
20+
"errors"
2021
"fmt"
2122
"strings"
2223
"time"
@@ -95,10 +96,18 @@ func (agent *Agent) getHealth() *protobufs.ComponentHealth {
9596
LastError: err.Error(),
9697
}
9798
}
99+
statusTime, err := agent.getCurrentTimeUnixNano()
100+
if err != nil {
101+
return &protobufs.ComponentHealth{
102+
Healthy: false,
103+
StartTimeUnixNano: agent.startTime,
104+
LastError: err.Error(),
105+
}
106+
}
98107
return &protobufs.ComponentHealth{
99108
Healthy: true,
100109
StartTimeUnixNano: agent.startTime,
101-
StatusTimeUnixNano: uint64(agent.clock.Now().UnixNano()),
110+
StatusTimeUnixNano: statusTime,
102111
LastError: "",
103112
ComponentHealthMap: healthMap,
104113
}
@@ -124,9 +133,17 @@ func (agent *Agent) generateCollectorPoolHealth() (map[string]*protobufs.Compone
124133
for _, pod := range podMap {
125134
isPoolHealthy = isPoolHealthy && pod.Healthy
126135
}
136+
podStartTime, err := timeToUnixNanoUnsigned(col.ObjectMeta.GetCreationTimestamp().Time)
137+
if err != nil {
138+
return nil, err
139+
}
140+
statusTime, err := agent.getCurrentTimeUnixNano()
141+
if err != nil {
142+
return nil, err
143+
}
127144
healthMap[key.String()] = &protobufs.ComponentHealth{
128-
StartTimeUnixNano: uint64(col.ObjectMeta.GetCreationTimestamp().UnixNano()),
129-
StatusTimeUnixNano: uint64(agent.clock.Now().UnixNano()),
145+
StartTimeUnixNano: podStartTime,
146+
StatusTimeUnixNano: statusTime,
130147
Status: col.Status.Scale.StatusReplicas,
131148
ComponentHealthMap: podMap,
132149
Healthy: isPoolHealthy,
@@ -158,6 +175,10 @@ func (agent *Agent) getCollectorSelector(col v1beta1.OpenTelemetryCollector) map
158175
}
159176

160177
func (agent *Agent) generateCollectorHealth(selectorLabels map[string]string, namespace string) (map[string]*protobufs.ComponentHealth, error) {
178+
statusTime, err := agent.getCurrentTimeUnixNano()
179+
if err != nil {
180+
return nil, err
181+
}
161182
pods, err := agent.applier.GetCollectorPods(selectorLabels, namespace)
162183
if err != nil {
163184
return nil, err
@@ -169,15 +190,18 @@ func (agent *Agent) generateCollectorHealth(selectorLabels map[string]string, na
169190
if item.Status.Phase != "Running" {
170191
healthy = false
171192
}
172-
var startTime int64
193+
var startTime uint64
173194
if item.Status.StartTime != nil {
174-
startTime = item.Status.StartTime.UnixNano()
195+
startTime, err = timeToUnixNanoUnsigned(item.Status.StartTime.Time)
196+
if err != nil {
197+
return nil, err
198+
}
175199
} else {
176200
healthy = false
177201
}
178202
healthMap[key.String()] = &protobufs.ComponentHealth{
179-
StartTimeUnixNano: uint64(startTime),
180-
StatusTimeUnixNano: uint64(agent.clock.Now().UnixNano()),
203+
StartTimeUnixNano: startTime,
204+
StatusTimeUnixNano: statusTime,
181205
Status: string(item.Status.Phase),
182206
Healthy: healthy,
183207
}
@@ -197,7 +221,7 @@ func (agent *Agent) onConnectFailed(ctx context.Context, err error) {
197221

198222
// onError is called when an agent receives an error response from the server.
199223
func (agent *Agent) onError(ctx context.Context, err *protobufs.ServerErrorResponse) {
200-
agent.logger.Error(fmt.Errorf(err.GetErrorMessage()), "server returned an error response")
224+
agent.logger.Error(errors.New(err.GetErrorMessage()), "server returned an error response")
201225
}
202226

203227
// saveRemoteConfigStatus receives a status from the server when the server sets a remote configuration.
@@ -207,7 +231,11 @@ func (agent *Agent) saveRemoteConfigStatus(_ context.Context, status *protobufs.
207231

208232
// Start sets up the callbacks for the OpAMP client and begins the client's connection to the server.
209233
func (agent *Agent) Start() error {
210-
agent.startTime = uint64(agent.clock.Now().UnixNano())
234+
startTime, err := agent.getCurrentTimeUnixNano()
235+
if err != nil {
236+
return err
237+
}
238+
agent.startTime = startTime
211239
settings := types.StartSettings{
212240
OpAMPServerURL: agent.config.Endpoint,
213241
Header: agent.config.Headers.ToHTTPHeader(),
@@ -224,7 +252,7 @@ func (agent *Agent) Start() error {
224252
PackagesStateProvider: nil,
225253
Capabilities: agent.config.GetCapabilities(),
226254
}
227-
err := agent.opampClient.SetAgentDescription(agent.agentDescription)
255+
err = agent.opampClient.SetAgentDescription(agent.agentDescription)
228256
if err != nil {
229257
return err
230258
}
@@ -429,3 +457,20 @@ func (agent *Agent) onMessage(ctx context.Context, msg *types.MessageData) {
429457
agent.initMeter(msg.OwnMetricsConnSettings)
430458
}
431459
}
460+
461+
// getCurrentTimeUnixNano returns the current time as a uint64, which the protocol expects.
462+
func (agent *Agent) getCurrentTimeUnixNano() (uint64, error) {
463+
// technically this could be negative if the system time is set to before 1970-01-1
464+
// the proto demands this to be a nonnegative number, so in that case, just return 0
465+
return timeToUnixNanoUnsigned(agent.clock.Now())
466+
}
467+
468+
// timeToUnixNanoUnsigned returns the number of nanoseconds elapsed from 1970-01-01 to the given time, but returns an
469+
// error if the value is negative. OpAMP expects these values to be non-negative.
470+
func timeToUnixNanoUnsigned(t time.Time) (uint64, error) {
471+
signedUnixNano := t.UnixNano()
472+
if signedUnixNano < 0 {
473+
return 0, fmt.Errorf("invalid system time, must be after 01-01-1970 due to OpAMP requirements: %v", t)
474+
}
475+
return uint64(signedUnixNano), nil
476+
}

cmd/operator-opamp-bridge/agent/agent_test.go

+26-24
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,7 @@ const (
6363
agentTestFileBatchNotAllowedName = "testdata/agentbatchnotallowed.yaml"
6464
agentTestFileNoProcessorsAllowedName = "testdata/agentnoprocessorsallowed.yaml"
6565

66-
// collectorStartTime is set to the result of a zero'd out creation timestamp
67-
// read more here https://github.com/open-telemetry/opentelemetry-go/issues/4268
68-
// we could attempt to hack the creation timestamp, but this is a constant and far easier.
69-
collectorStartTime = uint64(11651379494838206464)
66+
collectorStartTime = uint64(0)
7067
)
7168

7269
var (
@@ -78,8 +75,9 @@ var (
7875
updatedYamlConfigHash = getConfigHash(testCollectorKey, collectorUpdatedFile)
7976
otherUpdatedYamlConfigHash = getConfigHash(otherCollectorKey, collectorUpdatedFile)
8077

81-
podTime = metav1.NewTime(time.UnixMicro(1704748549000000))
82-
mockPodList = &v1.PodList{
78+
podTime = metav1.NewTime(time.Unix(0, 0))
79+
podTimeUnsigned, _ = timeToUnixNanoUnsigned(podTime.Time)
80+
mockPodList = &v1.PodList{
8381
TypeMeta: metav1.TypeMeta{
8482
Kind: "PodList",
8583
APIVersion: "v1",
@@ -95,6 +93,7 @@ var (
9593
"app.kubernetes.io/part-of": "opentelemetry",
9694
"app.kubernetes.io/component": "opentelemetry-collector",
9795
},
96+
CreationTimestamp: podTime,
9897
},
9998
Spec: v1.PodSpec{},
10099
Status: v1.PodStatus{
@@ -119,6 +118,7 @@ var (
119118
"app.kubernetes.io/part-of": "opentelemetry",
120119
"app.kubernetes.io/component": "opentelemetry-collector",
121120
},
121+
CreationTimestamp: podTime,
122122
},
123123
Spec: v1.PodSpec{},
124124
Status: v1.PodStatus{
@@ -215,6 +215,8 @@ func getFakeApplier(t *testing.T, conf *config.Config, lists ...runtimeClient.Ob
215215

216216
func TestAgent_getHealth(t *testing.T) {
217217
fakeClock := testingclock.NewFakeClock(time.Now())
218+
startTime, err := timeToUnixNanoUnsigned(fakeClock.Now())
219+
require.NoError(t, err)
218220
type fields struct {
219221
configFile string
220222
}
@@ -244,10 +246,10 @@ func TestAgent_getHealth(t *testing.T) {
244246
want: []*protobufs.ComponentHealth{
245247
{
246248
Healthy: true,
247-
StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
249+
StartTimeUnixNano: startTime,
248250
LastError: "",
249251
Status: "",
250-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
252+
StatusTimeUnixNano: startTime,
251253
ComponentHealthMap: map[string]*protobufs.ComponentHealth{},
252254
},
253255
},
@@ -269,15 +271,15 @@ func TestAgent_getHealth(t *testing.T) {
269271
want: []*protobufs.ComponentHealth{
270272
{
271273
Healthy: true,
272-
StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
273-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
274+
StartTimeUnixNano: startTime,
275+
StatusTimeUnixNano: startTime,
274276
ComponentHealthMap: map[string]*protobufs.ComponentHealth{
275277
"testnamespace/collector": {
276278
Healthy: true,
277279
StartTimeUnixNano: collectorStartTime,
278280
LastError: "",
279281
Status: "",
280-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
282+
StatusTimeUnixNano: startTime,
281283
ComponentHealthMap: map[string]*protobufs.ComponentHealth{},
282284
},
283285
},
@@ -302,23 +304,23 @@ func TestAgent_getHealth(t *testing.T) {
302304
want: []*protobufs.ComponentHealth{
303305
{
304306
Healthy: true,
305-
StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
306-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
307+
StartTimeUnixNano: startTime,
308+
StatusTimeUnixNano: startTime,
307309
ComponentHealthMap: map[string]*protobufs.ComponentHealth{
308310
"testnamespace/collector": {
309311
Healthy: true,
310312
StartTimeUnixNano: collectorStartTime,
311313
LastError: "",
312314
Status: "",
313-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
315+
StatusTimeUnixNano: startTime,
314316
ComponentHealthMap: map[string]*protobufs.ComponentHealth{},
315317
},
316318
"testnamespace/other": {
317319
Healthy: true,
318320
StartTimeUnixNano: collectorStartTime,
319321
LastError: "",
320322
Status: "",
321-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
323+
StatusTimeUnixNano: startTime,
322324
ComponentHealthMap: map[string]*protobufs.ComponentHealth{},
323325
},
324326
},
@@ -342,21 +344,21 @@ func TestAgent_getHealth(t *testing.T) {
342344
want: []*protobufs.ComponentHealth{
343345
{
344346
Healthy: true,
345-
StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
346-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
347+
StartTimeUnixNano: startTime,
348+
StatusTimeUnixNano: startTime,
347349
ComponentHealthMap: map[string]*protobufs.ComponentHealth{
348350
"other/third": {
349351
Healthy: true,
350352
StartTimeUnixNano: collectorStartTime,
351353
LastError: "",
352354
Status: "",
353-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
355+
StatusTimeUnixNano: startTime,
354356
ComponentHealthMap: map[string]*protobufs.ComponentHealth{
355357
otherCollectorName + "/" + thirdCollectorName + "-1": {
356358
Healthy: true,
357359
Status: "Running",
358-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
359-
StartTimeUnixNano: uint64(podTime.UnixNano()),
360+
StatusTimeUnixNano: startTime,
361+
StartTimeUnixNano: podTimeUnsigned,
360362
},
361363
},
362364
},
@@ -381,20 +383,20 @@ func TestAgent_getHealth(t *testing.T) {
381383
want: []*protobufs.ComponentHealth{
382384
{
383385
Healthy: true,
384-
StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
385-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
386+
StartTimeUnixNano: startTime,
387+
StatusTimeUnixNano: startTime,
386388
ComponentHealthMap: map[string]*protobufs.ComponentHealth{
387389
"other/third": {
388390
Healthy: false, // we're working with mocks so the status will never be reconciled.
389391
StartTimeUnixNano: collectorStartTime,
390392
LastError: "",
391393
Status: "",
392-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
394+
StatusTimeUnixNano: startTime,
393395
ComponentHealthMap: map[string]*protobufs.ComponentHealth{
394396
otherCollectorName + "/" + thirdCollectorName + "-1": {
395397
Healthy: false,
396398
Status: "Running",
397-
StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()),
399+
StatusTimeUnixNano: startTime,
398400
StartTimeUnixNano: uint64(0),
399401
},
400402
},

cmd/operator-opamp-bridge/agent/testdata/basic.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ metadata:
44
name: simplest
55
labels:
66
"opentelemetry.io/opamp-managed": "true"
7+
creationTimestamp: "1970-01-01T00:00:00Z"
78
spec:
89
config:
910
receivers:

cmd/operator-opamp-bridge/agent/testdata/updated.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ metadata:
44
name: simplest
55
labels:
66
"opentelemetry.io/opamp-managed": "test-bridge"
7+
creationTimestamp: "1970-01-01T00:00:00Z"
78
spec:
89
config:
910
receivers:

0 commit comments

Comments
 (0)