Skip to content

Commit e5f6ebd

Browse files
authored
Add e2e test case for OpenTelemetry monitoring. (#2246)
1 parent 46495fd commit e5f6ebd

9 files changed

+233
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
- script: ./tests/e2e-openshift/monitoring/check_user_workload_monitoring.sh
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# oc -n openshift-user-workload-monitoring get pod
2+
# https://docs.openshift.com/container-platform/4.13/monitoring/enabling-monitoring-for-user-defined-projects.html#accessing-metrics-from-outside-cluster_enabling-monitoring-for-user-defined-projects
3+
4+
apiVersion: v1
5+
kind: ConfigMap
6+
metadata:
7+
name: cluster-monitoring-config
8+
namespace: openshift-monitoring
9+
data:
10+
config.yaml: |
11+
enableUserWorkload: true
12+
alertmanagerMain:
13+
enableUserAlertmanagerConfig: true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
annotations:
5+
prometheus.io/path: /metrics
6+
prometheus.io/port: "8888"
7+
prometheus.io/scrape: "true"
8+
labels:
9+
app.kubernetes.io/component: opentelemetry-collector
10+
app.kubernetes.io/managed-by: opentelemetry-operator
11+
app.kubernetes.io/name: cluster-collector-collector
12+
app.kubernetes.io/part-of: opentelemetry
13+
name: cluster-collector-collector
14+
status:
15+
availableReplicas: 1
16+
readyReplicas: 1
17+
replicas: 1
18+
19+
---
20+
apiVersion: monitoring.coreos.com/v1
21+
kind: ServiceMonitor
22+
metadata:
23+
labels:
24+
app.kubernetes.io/managed-by: opentelemetry-operator
25+
app.kubernetes.io/name: cluster-collector-collector
26+
name: cluster-collector-collector
27+
spec:
28+
endpoints:
29+
- port: monitoring
30+
selector:
31+
matchLabels:
32+
app.kubernetes.io/managed-by: opentelemetry-operator
33+
34+
---
35+
apiVersion: v1
36+
kind: Service
37+
metadata:
38+
labels:
39+
app.kubernetes.io/component: opentelemetry-collector
40+
app.kubernetes.io/managed-by: opentelemetry-operator
41+
app.kubernetes.io/name: cluster-collector-collector
42+
app.kubernetes.io/part-of: opentelemetry
43+
name: cluster-collector-collector
44+
spec:
45+
ports:
46+
- appProtocol: grpc
47+
name: otlp-grpc
48+
port: 4317
49+
protocol: TCP
50+
targetPort: 4317
51+
- appProtocol: http
52+
name: otlp-http
53+
port: 4318
54+
protocol: TCP
55+
targetPort: 4318
56+
selector:
57+
app.kubernetes.io/component: opentelemetry-collector
58+
app.kubernetes.io/managed-by: opentelemetry-operator
59+
app.kubernetes.io/part-of: opentelemetry
60+
type: ClusterIP
61+
62+
---
63+
apiVersion: v1
64+
kind: Service
65+
metadata:
66+
labels:
67+
app.kubernetes.io/component: opentelemetry-collector
68+
app.kubernetes.io/managed-by: opentelemetry-operator
69+
app.kubernetes.io/name: cluster-collector-collector
70+
app.kubernetes.io/part-of: opentelemetry
71+
operator.opentelemetry.io/collector-headless-service: Exists
72+
name: cluster-collector-collector-headless
73+
spec:
74+
ports:
75+
- appProtocol: grpc
76+
name: otlp-grpc
77+
port: 4317
78+
protocol: TCP
79+
targetPort: 4317
80+
- appProtocol: http
81+
name: otlp-http
82+
port: 4318
83+
protocol: TCP
84+
targetPort: 4318
85+
selector:
86+
app.kubernetes.io/component: opentelemetry-collector
87+
app.kubernetes.io/managed-by: opentelemetry-operator
88+
app.kubernetes.io/part-of: opentelemetry
89+
type: ClusterIP
90+
91+
---
92+
apiVersion: v1
93+
kind: Service
94+
metadata:
95+
labels:
96+
app.kubernetes.io/component: opentelemetry-collector
97+
app.kubernetes.io/managed-by: opentelemetry-operator
98+
app.kubernetes.io/name: cluster-collector-collector-monitoring
99+
app.kubernetes.io/part-of: opentelemetry
100+
name: cluster-collector-collector-monitoring
101+
spec:
102+
ports:
103+
- name: monitoring
104+
port: 8888
105+
protocol: TCP
106+
targetPort: 8888
107+
selector:
108+
app.kubernetes.io/component: opentelemetry-collector
109+
app.kubernetes.io/managed-by: opentelemetry-operator
110+
app.kubernetes.io/part-of: opentelemetry
111+
type: ClusterIP
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: opentelemetry.io/v1alpha1
2+
kind: OpenTelemetryCollector
3+
metadata:
4+
name: cluster-collector
5+
spec:
6+
mode: deployment
7+
observability:
8+
metrics:
9+
enableMetrics: true
10+
config: |
11+
receivers:
12+
otlp:
13+
protocols:
14+
grpc:
15+
http:
16+
processors:
17+
exporters:
18+
debug:
19+
service:
20+
pipelines:
21+
traces:
22+
receivers: [otlp]
23+
processors: []
24+
exporters: [debug]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: batch/v1
2+
kind: Job
3+
metadata:
4+
name: telemetrygen-traces
5+
status:
6+
active: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
apiVersion: batch/v1
2+
kind: Job
3+
metadata:
4+
name: telemetrygen-traces
5+
spec:
6+
completions: 1
7+
parallelism: 1
8+
template:
9+
metadata:
10+
labels:
11+
app: telemetrygen-traces
12+
spec:
13+
containers:
14+
- name: telemetrygen-traces
15+
image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/telemetrygen:latest
16+
command: ["./telemetrygen"]
17+
args:
18+
- "--otlp-endpoint=cluster-collector-collector-headless:4317"
19+
- "--otlp-insecure=true"
20+
- "--rate=1"
21+
- "--duration=5s"
22+
- "--otlp-attributes=telemetrygen=\"traces\""
23+
- "--otlp-header=telemetrygen=\"traces\""
24+
- "traces"
25+
restartPolicy: Never
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
commands:
5+
- script: ./tests/e2e-openshift/monitoring/check_metrics.sh
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
3+
SECRET=$(oc get secret -n openshift-user-workload-monitoring | grep prometheus-user-workload-token | head -n 1 | awk '{print $1}')
4+
TOKEN=$(echo $(oc get secret $SECRET -n openshift-user-workload-monitoring -o json | jq -r '.data.token') | base64 -d)
5+
THANOS_QUERIER_HOST=$(oc get route thanos-querier -n openshift-monitoring -o json | jq -r '.spec.host')
6+
7+
#Check metrics used in the prometheus rules created for TempoStack. Refer issue https://issues.redhat.com/browse/TRACING-3399 for skipped metrics.
8+
metrics="otelcol_exporter_enqueue_failed_spans otelcol_exporter_sent_spans otelcol_process_cpu_seconds otelcol_process_memory_rss otelcol_process_runtime_heap_alloc_bytes otelcol_process_runtime_total_alloc_bytes otelcol_process_runtime_total_sys_memory_bytes otelcol_process_uptime otelcol_receiver_accepted_spans otelcol_receiver_refused_spans"
9+
10+
for metric in $metrics; do
11+
query="$metric"
12+
13+
response=$(curl -k -H "Authorization: Bearer $TOKEN" -H "Content-type: application/json" "https://$THANOS_QUERIER_HOST/api/v1/query?query=$query")
14+
15+
count=$(echo "$response" | jq -r '.data.result | length')
16+
17+
if [[ $count -eq 0 ]]; then
18+
echo "No metric '$metric' with value present. Exiting with status 1."
19+
exit 1
20+
else
21+
echo "Metric '$metric' with value is present."
22+
fi
23+
done
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
check_replicas() {
6+
replicas=$(oc get $1 $2 -n openshift-user-workload-monitoring -o 'jsonpath={.status.availableReplicas} {.status.readyReplicas} {.status.replicas}')
7+
for count in $replicas; do
8+
if [[ $count =~ ^[0-9]+$ ]]; then
9+
if ((count < 1)); then
10+
echo "The number of replicas is 0 for $1 $2"
11+
exit 1
12+
fi
13+
else
14+
echo "Error: Replica count is not a valid number for $1 $2"
15+
exit 1
16+
fi
17+
done
18+
}
19+
20+
check_replicas deployment prometheus-operator
21+
check_replicas statefulset prometheus-user-workload
22+
check_replicas statefulset thanos-ruler-user-workload

0 commit comments

Comments
 (0)