Skip to content

Commit 218ef37

Browse files
committed
Fikser alerts.
1 parent 6baf132 commit 218ef37

File tree

2 files changed

+21
-19
lines changed

2 files changed

+21
-19
lines changed

nais/alerterator-dev.yml

+7-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ spec:
1818
summary: "App \{{ $labels.deployment }} er nede i namespace \{{ $labels.namespace }}"
1919
labels:
2020
namespace: {{namespace}}
21-
severity: danger
21+
severity: critical
2222

2323
- alert: Høy andel error i logger
2424
expr: sum by (app, container, pod, namespace) (floor(increase(logback_events_total{app="{{app}}", level="error"} [3m]))) > 0
@@ -28,7 +28,7 @@ spec:
2828
summary: "Høy andel error i logger for app \{{ $labels.app }} feiler med \{{ $labels.exception }} i namespace \{{ $labels.namespace }}"
2929
labels:
3030
namespace: {{namespace}}
31-
severity: danger
31+
severity: critical
3232

3333
- alert: Høy andel warning i logger
3434
expr: sum by (app, container, pod, namespace) (floor(increase(logback_events_total{app="{{app}}", level="warning"} [3m]))) > 0
@@ -50,10 +50,9 @@ spec:
5050
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
5151
labels:
5252
namespace: {{namespace}}
53-
severity: danger
53+
severity: critical
5454

5555
- alert: Høy andel HTTP klientfeil (4xx responser)
56-
severity: danger
5756
expr: floor(increase(http_server_requests_seconds_count{status=~"4.*", status!~"404|401|403", app="{{app}}"}[3m])) > 0
5857
for: 1m
5958
annotations:
@@ -63,7 +62,7 @@ spec:
6362
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
6463
labels:
6564
namespace: {{namespace}}
66-
severity: danger
65+
severity: critical
6766

6867
- alert: Konsumering av meldinger feiler
6968
expr: ceil(increase(spring_kafka_listener_seconds_count{result="failure", app="{{app}}"}[3m])) > 0
@@ -73,7 +72,7 @@ spec:
7372
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
7473
labels:
7574
namespace: {{namespace}}
76-
severity: danger
75+
severity: critical
7776

7877
- alert: Publisering av meldinger feiler
7978
expr: ceil(increase(spring_kafka_template_seconds_count{result="failure", app="{{app}}"}[3m])) > 0
@@ -83,7 +82,7 @@ spec:
8382
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
8483
labels:
8584
namespace: {{namespace}}
86-
severity: danger
85+
severity: critical
8786

8887
- alert: Helsesjekk feiler
8988
expr: floor(increase(http_server_requests_seconds_count{status!~"200", uri="/actuator/health", app="{{app}}"}[3m])) > 0
@@ -93,4 +92,4 @@ spec:
9392
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
9493
labels:
9594
namespace: {{namespace}}
96-
severity: danger
95+
severity: critical

nais/alerterator-prod.yml

+14-11
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ spec:
1818
summary: "App \{{ $labels.deployment }} er nede i namespace \{{ $labels.namespace }}"
1919
labels:
2020
namespace: {{namespace}}
21-
severity: danger
21+
severity: critical
2222

2323
- alert: Høy andel error i logger
2424
expr: sum by (app, container, pod, namespace) (floor(increase(logback_events_total{app="{{app}}", level="error"} [3m]))) > 0
@@ -28,7 +28,7 @@ spec:
2828
summary: "Høy andel error i logger for app \{{ $labels.app }} feiler med \{{ $labels.exception }} i namespace \{{ $labels.namespace }}"
2929
labels:
3030
namespace: {{namespace}}
31-
severity: danger
31+
severity: critical
3232

3333
- alert: Høy andel warning i logger
3434
expr: sum by (app, container, pod, namespace) (floor(increase(logback_events_total{app="{{app}}", level="warning"} [3m]))) > 0
@@ -50,16 +50,19 @@ spec:
5050
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
5151
labels:
5252
namespace: {{namespace}}
53-
severity: danger
53+
severity: critical
5454

5555
- alert: Høy andel HTTP klientfeil (4xx responser)
56-
severity: danger
5756
expr: floor(increase(http_server_requests_seconds_count{status=~"4.*", status!~"404|401|403|451", app="{{app}}"}[3m])) > 0
5857
for: 1m
59-
description: "Følgende request feilet: `Status \{{ $labels.status }} - \{{ $labels.method }} \{{ $labels.route }}`.\n
60-
Grunn:\n ```\{{ $labels.problem_details }}```\n
61-
Sjekk loggene for å se hvorfor dette feiler"
62-
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
58+
annotations:
59+
summary: "Følgende request feilet: `Status \{{ $labels.status }} - \{{ $labels.method }} \{{ $labels.route }}`.\n
60+
Grunn:\n ```\{{ $labels.problem_details }}```\n
61+
Sjekk loggene for å se hvorfor dette feiler."
62+
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
63+
labels:
64+
namespace: {{namespace}}
65+
severity: critical
6366

6467
- alert: Konsumering av meldinger feiler
6568
expr: ceil(increase(spring_kafka_listener_seconds_count{result="failure", app="{{app}}"}[3m])) > 0
@@ -69,7 +72,7 @@ spec:
6972
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
7073
labels:
7174
namespace: {{namespace}}
72-
severity: danger
75+
severity: critical
7376

7477
- alert: Publisering av meldinger feiler
7578
expr: ceil(increase(spring_kafka_template_seconds_count{result="failure", app="{{app}}"}[3m])) > 0
@@ -79,7 +82,7 @@ spec:
7982
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
8083
labels:
8184
namespace: {{namespace}}
82-
severity: danger
85+
severity: critical
8386

8487
- alert: Helsesjekk feiler
8588
expr: floor(increase(http_server_requests_seconds_count{status!~"200", uri="/actuator/health", app="{{app}}"}[3m])) > 0
@@ -89,4 +92,4 @@ spec:
8992
action: "`kubectl logs \{{ $labels.pod }} -n \{{ $labels.namespace }} -c \{{ $labels.app }}`"
9093
labels:
9194
namespace: {{namespace}}
92-
severity: danger
95+
severity: critical

0 commit comments

Comments
 (0)