Skip to content

Commit bfb658d

Browse files
author
Ruben L. Mendoza
authored
Changeset replication container (#299)
* Add changeset replication container * Add charts template to deploy the changeset-replication container * Update start files for changeset replication
1 parent 79a51f1 commit bfb658d

File tree

8 files changed

+213
-13
lines changed

8 files changed

+213
-13
lines changed

.dockerignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,5 @@ data/
4141
# ignore all markdown files (md) beside all README*.md
4242
*.md
4343
!README*.md
44-
44+
changeset-replication-job/config.yaml
4545
envs/

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,4 @@ tiler-server/imposm/
3535
overpass-api-db/
3636
data/*/
3737
!data/README.md
38+
images/changeset-replication-job/config.yaml

chartpress.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,6 @@ charts:
3939
taginfo:
4040
valuesPath: taginfo.image
4141
osm-simple-metrics:
42-
valuesPath: osmSimpleMetrics.image
42+
valuesPath: osmSimpleMetrics.image
43+
changeset-replication-job:
44+
valuesPath: changesetReplicationJob.image

compose/replication.yml

+19-11
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,24 @@ services:
99
context: ../images/replication-job
1010
dockerfile: Dockerfile
1111
volumes:
12-
- ../data/replication-job-data:/mnt/data
12+
- ../data/replication-job-data:/mnt/data
1313
command: >
14-
/bin/bash -c "
15-
echo Sleep the replication-job for 1 minute;
16-
sleep 1m;
17-
echo Creating the replication files!;
18-
/start.sh
19-
"
14+
/bin/bash -c " echo Sleep the replication-job for 1 minute; sleep 1m; echo Creating the replication files!; /start.sh "
2015
env_file:
21-
- ../envs/.env.db
22-
- ../envs/.env.db-utils
23-
- ../envs/.env.cloudprovider
24-
16+
- ../envs/.env.db
17+
- ../envs/.env.db-utils
18+
- ../envs/.env.cloudprovider
19+
changeset-replication-job:
20+
image: osmseed-changeset-replication-job:v1
21+
build:
22+
context: ../images/changeset-replication-job
23+
dockerfile: Dockerfile
24+
volumes:
25+
- ../data/changeset-replication-job-data:/mnt/changesets
26+
# - ./../images/changeset-replication-job:/openstreetmap-changeset-replication
27+
command: >
28+
/bin/bash -c "./start.sh"
29+
env_file:
30+
- ../envs/.env.db
31+
- ../envs/.env.db-utils
32+
- ../envs/.env.cloudprovider
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
FROM ruby:2.4
2+
RUN git clone https://github.com/zerebubuth/openstreetmap-changeset-replication.git /app
3+
WORKDIR /app
4+
RUN apt-get install -y curl unzip
5+
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \
6+
&& unzip awscliv2.zip \
7+
&& ./aws/install
8+
# TODO: Install Google Cloud Platform (GCP) and Azure CLI for better data management
9+
RUN gem install pg -v 0.21.0
10+
RUN gem install libxml-ruby -v 3.1.0
11+
COPY start.sh .
12+
RUN chmod +x replicate_changesets.rb
13+
CMD /app/start.sh
+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env bash
2+
set -e
3+
4+
workingDirectory="/mnt/changesets"
5+
mkdir -p "$workingDirectory"
6+
CHANGESETS_REPLICATION_FOLDER="replication/changesets"
7+
8+
# Creating config file
9+
echo "state_file: $workingDirectory/state.yaml
10+
db: host=$POSTGRES_HOST dbname=$POSTGRES_DB user=$POSTGRES_USER password=$POSTGRES_PASSWORD
11+
data_dir: $workingDirectory/" >/config.yaml
12+
13+
# Verify the existence of the state.yaml file across all cloud providers. If it's not found, create a new one.
14+
if [ ! -f "$workingDirectory/state.yaml" ]; then
15+
echo "File $workingDirectory/state.yaml does not exist in local storage"
16+
17+
if [ "$CLOUDPROVIDER" == "aws" ]; then
18+
if aws s3 ls "$AWS_S3_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" >/dev/null 2>&1; then
19+
echo "File exists, downloading from AWS - $AWS_S3_BUCKET"
20+
aws s3 cp "$AWS_S3_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" "$workingDirectory/state.yaml"
21+
fi
22+
elif [ "$CLOUDPROVIDER" == "gcp" ]; then
23+
if gsutil -q stat "$GCP_STORAGE_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml"; then
24+
echo "File exists, downloading from GCP - $GCP_STORAGE_BUCKET"
25+
gsutil cp "$GCP_STORAGE_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" "$workingDirectory/state.yaml"
26+
fi
27+
elif [ "$CLOUDPROVIDER" == "azure" ]; then
28+
state_file_exists=$(az storage blob exists --container-name "$AZURE_CONTAINER_NAME" --name "$CHANGESETS_REPLICATION_FOLDER/state.yaml" --query "exists" --output tsv)
29+
if [ "$state_file_exists" == "true" ]; then
30+
echo "File exists, downloading from Azure - $AZURE_CONTAINER_NAME"
31+
az storage blob download --container-name "$AZURE_CONTAINER_NAME" --name "$CHANGESETS_REPLICATION_FOLDER/state.yaml" --file "$workingDirectory/state.yaml"
32+
fi
33+
fi
34+
if [ ! -f "$workingDirectory/state.yaml" ]; then
35+
echo "sequence: 0" >"$workingDirectory/state.yaml"
36+
fi
37+
fi
38+
39+
# Creating the replication files
40+
generateReplication() {
41+
while true; do
42+
# Run replication script
43+
ruby replicate_changesets.rb /config.yaml
44+
45+
# Loop through newly created files
46+
for local_file in $(find "$workingDirectory/" -cmin -1); do
47+
if [ -f "$local_file" ]; then
48+
# Construct the cloud path for the file
49+
cloud_file="$CHANGESETS_REPLICATION_FOLDER/${local_file#*$workingDirectory/}"
50+
51+
# Log file transfer
52+
echo "$(date +%F_%H:%M:%S): Copying file $local_file to $cloud_file"
53+
54+
# Handle different cloud providers
55+
case "$CLOUDPROVIDER" in
56+
"aws")
57+
aws s3 cp "$local_file" "$AWS_S3_BUCKET/$cloud_file" --acl public-read
58+
;;
59+
"gcp")
60+
gsutil cp -a public-read "$local_file" "$GCP_STORAGE_BUCKET/$cloud_file"
61+
;;
62+
"azure")
63+
az storage blob upload \
64+
--container-name "$AZURE_CONTAINER_NAME" \
65+
--file "$local_file" \
66+
--name "$cloud_file" \
67+
--output none
68+
;;
69+
*)
70+
echo "Unknown cloud provider: $CLOUDPROVIDER"
71+
;;
72+
esac
73+
fi
74+
done
75+
76+
# Sleep for 60 seconds before next iteration
77+
sleep 60s
78+
done
79+
}
80+
81+
# Call the function to start the replication process
82+
generateReplication
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
{{- if .Values.changesetReplicationJob.enabled -}}
2+
apiVersion: apps/v1
3+
kind: Deployment
4+
metadata:
5+
name: {{ .Release.Name }}-changeset-replication-job
6+
labels:
7+
app: {{ template "osm-seed.name" . }}
8+
component: changeset-replication-job
9+
environment: {{ .Values.environment }}
10+
release: {{ .Release.Name }}
11+
spec:
12+
replicas: 1
13+
selector:
14+
matchLabels:
15+
app: {{ template "osm-seed.name" . }}
16+
template:
17+
metadata:
18+
labels:
19+
app: {{ template "osm-seed.name" . }}
20+
spec:
21+
containers:
22+
- name: {{ .Release.Name }}-changeset-replication-job-deployment
23+
image: {{ .Values.changesetReplicationJob.image.name }}:{{ .Values.changesetReplicationJob.image.tag }}
24+
# command: ['/start.sh']
25+
{{- if .Values.changesetReplicationJob.resources.enabled }}
26+
resources:
27+
requests:
28+
memory: {{ .Values.changesetReplicationJob.resources.requests.memory }}
29+
cpu: {{ .Values.changesetReplicationJob.resources.requests.cpu }}
30+
limits:
31+
memory: {{ .Values.changesetReplicationJob.resources.limits.memory }}
32+
cpu: {{ .Values.changesetReplicationJob.resources.limits.cpu }}
33+
{{- end }}
34+
env:
35+
- name: POSTGRES_HOST
36+
value: {{ .Release.Name }}-db
37+
- name: POSTGRES_DB
38+
value: {{ .Values.db.env.POSTGRES_DB }}
39+
- name: POSTGRES_PASSWORD
40+
value: {{ quote .Values.db.env.POSTGRES_PASSWORD }}
41+
- name: POSTGRES_USER
42+
value: {{ .Values.db.env.POSTGRES_USER }}
43+
- name: REPLICATION_FOLDER
44+
value: replication/minute
45+
- name: CLOUDPROVIDER
46+
value: {{ .Values.cloudProvider }}
47+
# In case cloudProvider=aws
48+
{{- if eq .Values.cloudProvider "aws" }}
49+
- name: AWS_S3_BUCKET
50+
value: {{ .Values.AWS_S3_BUCKET }}
51+
{{- end }}
52+
# In case cloudProvider=gcp
53+
{{- if eq .Values.cloudProvider "gcp" }}
54+
- name: GCP_STORAGE_BUCKET
55+
value: {{ .Values.GCP_STORAGE_BUCKET }}
56+
{{- end }}
57+
# In case cloudProvider=azure
58+
{{- if eq .Values.cloudProvider "azure" }}
59+
- name: AZURE_STORAGE_ACCOUNT
60+
value: {{ .Values.AZURE_STORAGE_ACCOUNT }}
61+
- name: AZURE_CONTAINER_NAME
62+
value: {{ .Values.AZURE_CONTAINER_NAME }}
63+
- name: AZURE_STORAGE_CONNECTION_STRING
64+
value: {{ .Values.AZURE_STORAGE_CONNECTION_STRING }}
65+
{{- end }}
66+
# Memory optimization for osmosis
67+
{{- if .Values.changesetReplicationJob.resources.enabled }}
68+
- name: MEMORY_JAVACMD_OPTIONS
69+
value: {{ .Values.changesetReplicationJob.resources.requests.memory | default "2Gi" | quote}}
70+
{{- end }}
71+
{{- if .Values.changesetReplicationJob.nodeSelector.enabled }}
72+
nodeSelector:
73+
{{ .Values.changesetReplicationJob.nodeSelector.label_key }} : {{ .Values.changesetReplicationJob.nodeSelector.label_value }}
74+
{{- end }}
75+
{{- end }}

osm-seed/values.yaml

+19
Original file line numberDiff line numberDiff line change
@@ -688,3 +688,22 @@ monitoringReplication:
688688
cpu: '2'
689689
nodeSelector:
690690
enabled: false
691+
692+
# ====================================================================================================
693+
# Variables for changeset-replication-job, Configuration to create the replication files by, minute, hour, or day
694+
# ====================================================================================================
695+
changesetReplicationJob:
696+
enabled: false
697+
image:
698+
name: ''
699+
tag: ''
700+
resources:
701+
enabled: false
702+
requests:
703+
memory: '20Gi'
704+
cpu: '8'
705+
limits:
706+
memory: '24Gi'
707+
cpu: '10'
708+
nodeSelector:
709+
enabled: false

0 commit comments

Comments
 (0)