Skip to content

Commit 54bab3f

Browse files
feat(charts): implement postgres backup/restore on pollux
1 parent 249020d commit 54bab3f

File tree

7 files changed

+291
-0
lines changed

7 files changed

+291
-0
lines changed

charts/workflows-cluster/dev-values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ backup:
88
snapshotDir: /backup/
99
snapshotName: etcd-snapshot
1010
fileExtension: .db
11+
postgres:
12+
enabled: false
13+
snapshotDir: /backup/
14+
snapshotName: postgres-snapshot
15+
fileExtension: .pgdump
1116
bucket:
1217
prefix: dev
1318

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
if [ -z "$KUBECONFIG" ]
6+
then
7+
echo "ERROR: Kube config not found - have you loaded a cluster?"
8+
exit 1
9+
fi
10+
11+
CLUSTER=$(kubectl config view --minify -o jsonpath='{.clusters[0].name}')
12+
13+
if [[ "$CLUSTER" == vcluster* ]]; then
14+
CLUSTER_NAME=$(echo $CLUSTER | awk -F"@" '{print $2}')
15+
cd "${0%/*}/.."
16+
read -p "WARNING: You are about to attempt a full restore of the Argo Workflows database on $CLUSTER_NAME. Proceed? (y/n)" -r
17+
if [[ $REPLY =~ ^[Yy]$ && ! -z "$CLUSTER_NAME" ]]; then
18+
kubectl -n workflows delete job restore-postgres --ignore-not-found
19+
helm template . -s templates/postgres-restore-job.yaml | kubectl -n workflows apply -f -
20+
kubectl -n workflows get job restore-postgres >/dev/null
21+
if ! kubectl -n workflows wait --for=condition=complete job/restore-postgres --timeout=120s; then
22+
echo "Restore job failed or timed out. Fetching logs..."
23+
kubectl -n workflows logs job/restore-postgres --all-containers=true
24+
exit 1
25+
fi
26+
echo "Restore complete."
27+
fi
28+
else
29+
echo "ERROR: This scipt must be run inside the VCluster"
30+
exit 1
31+
fi
32+
33+
34+

charts/workflows-cluster/staging-values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ backup:
77
snapshotDir: /backup/
88
snapshotName: etcd-snapshot
99
fileExtension: .db
10+
postgres:
11+
enabled: true
12+
snapshotDir: /backup/
13+
snapshotName: postgres-snapshot
14+
fileExtension: .pgdump
1015
bucket:
1116
prefix: staging
1217

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: backup-postgres
5+
spec:
6+
{{- if $.Values.backup.postgres.enabled }}
7+
schedule: "@daily"
8+
{{ else }}
9+
schedule: "@yearly"
10+
suspend: true
11+
{{- end }}
12+
jobTemplate:
13+
spec:
14+
template:
15+
spec:
16+
restartPolicy: Never
17+
initContainers:
18+
- name: create-backup
19+
image: docker.io/postgres
20+
command: ["pg_dump"]
21+
args:
22+
- -h
23+
- $(PGHOST)
24+
- -U
25+
- $(PGUSER)
26+
- -d
27+
- $(DB)
28+
- --clean
29+
- --if-exists
30+
- --no-password
31+
- --file={{- $.Values.backup.postgres.snapshotDir -}}{{- $.Values.backup.postgres.snapshotName -}}{{- $.Values.backup.postgres.fileExtension }}
32+
env:
33+
- name: PGPASSWORD
34+
valueFrom:
35+
secretKeyRef:
36+
name: postgres-argo-workflows-password
37+
key: password
38+
- name: PGHOST
39+
value: workflows-postgresql-ha-pgpool-x-workflows-x-workflows-cluster
40+
- name: PGUSER
41+
valueFrom:
42+
secretKeyRef:
43+
name: postgres-argo-workflows-password
44+
key: username
45+
- name: DB
46+
value: argo_workflows
47+
- name: PGPORT
48+
value: "5432"
49+
volumeMounts:
50+
- mountPath: {{ $.Values.backup.postgres.snapshotDir | quote }}
51+
name: backup
52+
readOnly: false
53+
resources:
54+
requests:
55+
ephemeral-storage: "1Gi"
56+
limits:
57+
ephemeral-storage: "2Gi"
58+
containers:
59+
- name: rclone
60+
image: docker.io/rclone/rclone
61+
command: ["/bin/sh"]
62+
args: ["/scripts/rclone-upload.sh"]
63+
env:
64+
- name: RCLONE_CONFIG
65+
value: /etc/rclone.conf
66+
- name: PREFIX
67+
value: {{ $.Values.backup.bucket.prefix | quote }}
68+
- name: SNAPSHOT_DIR
69+
value: {{ $.Values.backup.postgres.snapshotDir | quote }}
70+
- name: SNAPSHOT_NAME
71+
value: {{ $.Values.backup.postgres.snapshotName | quote }}
72+
- name: SNAPSHOT_EXT
73+
value: {{ $.Values.backup.postgres.fileExtension | quote }}
74+
volumeMounts:
75+
- name: backup
76+
mountPath: {{ $.Values.backup.postgres.snapshotDir | quote }}
77+
- name: scripts
78+
mountPath: /scripts
79+
- name: rclone-conf
80+
mountPath: /etc/rclone.conf
81+
subPath: rclone.conf
82+
resources:
83+
requests:
84+
ephemeral-storage: "1Gi"
85+
limits:
86+
ephemeral-storage: "2Gi"
87+
restartPolicy: Never
88+
volumes:
89+
- name: backup
90+
emptyDir: {}
91+
- name: scripts
92+
configMap:
93+
name: rclone-scripts
94+
- name: rclone-conf
95+
secret:
96+
secretName: etcd-rclone-config
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
apiVersion: batch/v1
2+
kind: Job
3+
metadata:
4+
name: "restore-postgres"
5+
spec:
6+
template:
7+
spec:
8+
restartPolicy: Never
9+
volumes:
10+
- name: scripts
11+
configMap:
12+
name: rclone-scripts
13+
defaultMode: 0755
14+
- name: snapshot
15+
emptyDir: {}
16+
- name: rclone-conf
17+
secret:
18+
secretName: etcd-rclone-config
19+
initContainers:
20+
- name: rclone
21+
image: docker.io/rclone/rclone
22+
command: [/bin/sh, "-c", "/scripts/rclone-download.sh"]
23+
env:
24+
- name: RCLONE_CONFIG
25+
value: /etc/rclone.conf
26+
- name: PREFIX
27+
value: {{ $.Values.backup.bucket.prefix | quote }}
28+
- name: SNAPSHOT_NAME
29+
value: {{ $.Values.backup.postgres.snapshotName | quote }}
30+
- name: SNAPSHOT_EXT
31+
value: {{ $.Values.backup.postgres.fileExtension | quote }}
32+
volumeMounts:
33+
- name: scripts
34+
mountPath: /scripts
35+
- name: snapshot
36+
mountPath: /snapshot
37+
- name: rclone-conf
38+
mountPath: /etc/rclone.conf
39+
subPath: rclone.conf
40+
resources:
41+
requests:
42+
ephemeral-storage: "1Gi"
43+
limits:
44+
ephemeral-storage: "2Gi"
45+
containers:
46+
- name: restore-postgres
47+
image: docker.io/postgres
48+
command: ["/bin/sh", "-c"]
49+
args:
50+
- |
51+
host="workflows-postgresql-ha-postgresql-0.workflows-postgresql-ha-postgresql-headless"
52+
if psql -h "$host" -U postgres -tAc "SELECT pg_is_in_recovery()" | grep -q t; then
53+
echo "This is not the primary host"
54+
exit 1
55+
fi
56+
echo "Beginning restore"
57+
psql -h "$PRIMARY" -d $DB -U $PGUSER -f /snapshot/snapshot$(SNAPSHOT_EXT)
58+
echo "Restore complete."
59+
env:
60+
- name: PGPASSWORD
61+
valueFrom:
62+
secretKeyRef:
63+
name: postgres-passwords
64+
key: password
65+
- name: PGUSER
66+
value: postgres
67+
- name: DB
68+
value: argo_workflows
69+
- name: SNAPSHOT_NAME
70+
value: {{ $.Values.backup.postgres.snapshotName | quote }}
71+
- name: SNAPSHOT_EXT
72+
value: {{ $.Values.backup.postgres.fileExtension | quote }}
73+
volumeMounts:
74+
- name: snapshot
75+
mountPath: /snapshot
76+
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: postgres-test
5+
spec:
6+
dnsPolicy: ClusterFirst
7+
initContainers:
8+
- name: rclone
9+
image: docker.io/rclone/rclone
10+
command: [/bin/sh, "-c", "/scripts/rclone-download.sh"]
11+
env:
12+
- name: RCLONE_CONFIG
13+
value: /etc/rclone.conf
14+
- name: PREFIX
15+
value: staging
16+
- name: SNAPSHOT_NAME
17+
value: postgres-snapshot
18+
- name: SNAPSHOT_EXT
19+
value: ".pgdump"
20+
volumeMounts:
21+
- name: scripts
22+
mountPath: /scripts
23+
- name: snapshot
24+
mountPath: /snapshot
25+
- name: rclone-conf
26+
mountPath: /etc/rclone.conf
27+
subPath: rclone.conf
28+
resources:
29+
requests:
30+
ephemeral-storage: "1Gi"
31+
limits:
32+
ephemeral-storage: "2Gi"
33+
containers:
34+
- name: postgres-test
35+
image: docker.io/postgres
36+
command: ["/bin/sh", -c]
37+
args:
38+
- |
39+
sleep infinity
40+
env:
41+
- name: PGPASSWORD
42+
valueFrom:
43+
secretKeyRef:
44+
name: postgres-passwords
45+
key: password
46+
- name: PGUSER
47+
value: argo_workflows
48+
- name: DB
49+
value: argo_workflows
50+
- name: PGPORT
51+
value: "5432"
52+
volumeMounts:
53+
- mountPath: "/snapshot/"
54+
name: snapshot
55+
readOnly: false
56+
resources:
57+
requests:
58+
ephemeral-storage: "1Gi"
59+
limits:
60+
ephemeral-storage: "2Gi"
61+
volumes:
62+
- name: scripts
63+
configMap:
64+
name: rclone-scripts
65+
defaultMode: 0755
66+
- name: snapshot
67+
emptyDir: {}
68+
- name: rclone-conf
69+
secret:
70+
secretName: etcd-rclone-config

charts/workflows-cluster/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ backup:
88
snapshotDir: /backup/
99
snapshotName: etcd-snapshot
1010
fileExtension: .db
11+
postgres:
12+
enabled: false
13+
snapshotDir: /backup/
14+
snapshotName: postgres-snapshot
15+
fileExtension: .pgdump
1116
bucket:
1217
prefix: prod
1318

0 commit comments

Comments
 (0)