From 558049a62311b267d0aa163d9d3ddc4d3f4e1771 Mon Sep 17 00:00:00 2001 From: Martin Demko <325073@mail.muni.cz> Date: Mon, 12 May 2025 15:57:55 +0200 Subject: [PATCH 1/9] adds first version of playbook --- galaxy_db_recovery.yaml | 105 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 galaxy_db_recovery.yaml diff --git a/galaxy_db_recovery.yaml b/galaxy_db_recovery.yaml new file mode 100644 index 0000000..ac02929 --- /dev/null +++ b/galaxy_db_recovery.yaml @@ -0,0 +1,105 @@ +--- +- hosts: galaxy-qa1.galaxy.cloud.e-infra.cz + become: true + become_user: root + vars: + galaxy_db_restore_version: 20250505T010001Z + tasks: + - name: Install production-specific dependencies + package: + name: ['rsync'] + +# recover from WAL based on https://training.galaxyproject.org/training-material/topics/admin/tutorials/backup-cleanup/tutorial.html#restoration + ## Part responsible for recovering Galaxy DB from backup if exists (it should be in post_tasks of dbservers but NDB access needs to be set up first) + - name: Register psql data directory + ansible.builtin.shell: + cmd: psql -Atc 'show data_directory;' + register: psql_data_dir + become: true + become_user: postgres + +## Unsucessfull attempt to make a prompt with the list of all available backup versions +# - name: Gather Galaxy DB backup versions +# find: +# paths: "{{ postgresql_backup_dir }}" +# file_type: directory +# # You can also use file_type: 'file' for files only, or 'directory' for directories only +# register: db_versions +# become: true +# become_user: postgres +# - name: Set backup versions as a list +# set_fact: +# item_choices: "{{ db_versions.files | map(attribute='path') | list }}" +# - name: Select Galaxy DB backup version from a list +# vars_prompt: +# - name: "selected_version" +# prompt: "Select Galaxy DB version to restore" +# private: no +# choices: "{{ item_choices }}" +# - name: Print selected Galaxy DB backup versions +# debug: +# msg: "You selected {{ selected_version }} to restore" + + - name: Print PostgreSQL data directory path + debug: + msg: "PostgreSQL data directory path: {{ psql_data_dir.stdout }}" + - name: Print Warning + debug: + msg: "No PostgreSQL data directory path! PSQL DB restore is not possible!" + when: psql_data_dir.stdout == '' + + - name: Restore Galaxy DB backup - pick proper backup version + ansible.builtin.shell: + cmd: "if [ '{{ galaxy_db_restore_version }}' = 'latest' ]; then ls -dt {{ postgresql_backup_dir }}/20*Z | head -1; else ls -d {{ postgresql_backup_dir }}/{{ galaxy_db_restore_version }}; fi " + register: galaxy_db_restore_dir + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - print selected version + debug: + msg: "Backup version to be restored: {{ galaxy_db_restore_dir.stdout }}" + + - name: Restore Galaxy DB backup - stop postgresql + ansible.builtin.systemd: + name: postgresql + state: stopped + become: true + + - name: Restore Galaxy DB backup - backup current postgresql + ansible.builtin.shell: + cmd: "mv {{ psql_data_dir.stdout }} {{ psql_data_dir.stdout+'.backup_'+ansible_date_time.iso8601 }}" + ignore_errors: true + when: psql_data_dir.stdout != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - copy proper backup version + ansible.builtin.shell: + cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0750 {{ psql_data_dir.stdout }} " + when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "restore_command = 'cp \"{{ postgresql_backup_dir }}/wal_archive/%f\" \"%p\"'" + state: present + create: true + when: psql_data_dir.stdout != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - touch the signal file + ansible.builtin.shell: + cmd: "touch {{ psql_data_dir.stdout }}/recovery.signal " + when: psql_data_dir.stdout != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - start postgresql + ansible.builtin.systemd: + name: postgresql + state: started + become: true From 552ba6404ba298c1edd8f97fab7f37a2bf9fc167 Mon Sep 17 00:00:00 2001 From: martindemko <325073@mail.muni.cz> Date: Tue, 13 May 2025 23:50:54 +0200 Subject: [PATCH 2/9] fixes permissions of psql directory --- galaxy_db_recovery.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy_db_recovery.yaml b/galaxy_db_recovery.yaml index ac02929..f58db29 100644 --- a/galaxy_db_recovery.yaml +++ b/galaxy_db_recovery.yaml @@ -75,7 +75,7 @@ - name: Restore Galaxy DB backup - copy proper backup version ansible.builtin.shell: - cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0750 {{ psql_data_dir.stdout }} " + cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0700 {{ psql_data_dir.stdout }} " when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' become: true become_user: postgres From 80ef5d2d3cc3d0e007793dbcb8d72492aaacdcd0 Mon Sep 17 00:00:00 2001 From: Martin Demko <325073@mail.muni.cz> Date: Fri, 16 May 2025 13:55:37 +0200 Subject: [PATCH 3/9] changes psql recovery version to latest - default --- galaxy_db_recovery.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy_db_recovery.yaml b/galaxy_db_recovery.yaml index f58db29..9c2761a 100644 --- a/galaxy_db_recovery.yaml +++ b/galaxy_db_recovery.yaml @@ -3,7 +3,7 @@ become: true become_user: root vars: - galaxy_db_restore_version: 20250505T010001Z + galaxy_db_restore_version: latest tasks: - name: Install production-specific dependencies package: From e8df8ae6713618b444e1259ac9187abcda33e1a3 Mon Sep 17 00:00:00 2001 From: Martin Demko <325073@mail.muni.cz> Date: Thu, 5 Jun 2025 16:07:24 +0200 Subject: [PATCH 4/9] latest version of galaxy_db-recovery playbook --- galaxy_db_recovery.yaml | 99 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 7 deletions(-) diff --git a/galaxy_db_recovery.yaml b/galaxy_db_recovery.yaml index 9c2761a..a2b8823 100644 --- a/galaxy_db_recovery.yaml +++ b/galaxy_db_recovery.yaml @@ -1,16 +1,43 @@ --- -- hosts: galaxy-qa1.galaxy.cloud.e-infra.cz +- hosts: all + name: apt update, python, pip become: true become_user: root - vars: - galaxy_db_restore_version: latest tasks: - - name: Install production-specific dependencies - package: - name: ['rsync'] + - ansible.builtin.apt: + name: + - python3-pip + - python-is-python3 + update_cache: yes + when: ansible_os_family == 'Debian' +- hosts: dbservers + become: true + become_user: root + vars: + galaxy_db_restore_version: '20250529T010001Z' # Example: '20250529T010001Z'; 'latest' + galaxy_db_restore_timestamp: '2025-05-29 02:30:00' # Example: '2025-05-29 02:30:00' + galaxy_db_restore_timeline: 'current' # could be 'latest' (default) or 'current', which recovers along the same timeline that was current when the base backup was taken. Or using '0xID' for specific numeric timeline ID (hexadecimal number used in WAL file name) + galaxy_db_restore_action: 'promote' # could be 'promote' (to continue right after recovery) or 'pause' (by default) to manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user + pre_tasks: + - name: Install Dependencies + package: + name: ['acl', 'anacron', 'rsync'] + roles: + - galaxyproject.postgresql + - role: galaxyproject.postgresql_objects + become: true + become_user: postgres + post_tasks: # recover from WAL based on https://training.galaxyproject.org/training-material/topics/admin/tutorials/backup-cleanup/tutorial.html#restoration - ## Part responsible for recovering Galaxy DB from backup if exists (it should be in post_tasks of dbservers but NDB access needs to be set up first) + ## Part responsible for recovering Galaxy DB from backup if exists (it should be in post_tasks of dbservers but RDB (or NDB) access needs to be set up first) + - name: Register if galaxy exists + ansible.builtin.shell: + cmd: galaxyctl status + register: galaxyctl_status + ignore_errors: true + become: true + - name: Register psql data directory ansible.builtin.shell: cmd: psql -Atc 'show data_directory;' @@ -59,6 +86,12 @@ debug: msg: "Backup version to be restored: {{ galaxy_db_restore_dir.stdout }}" + - name: Restore Galaxy DB backup - stop Galaxy + ansible.builtin.shell: + cmd: galaxyctl stop + become: true + when: galaxyctl_status.rc == 0 + - name: Restore Galaxy DB backup - stop postgresql ansible.builtin.systemd: name: postgresql @@ -91,6 +124,39 @@ become: true become_user: postgres + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timestamp of recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_time = '{{ galaxy_db_restore_timestamp }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_timestamp != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timeline of recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_timeline = '{{ galaxy_db_restore_timeline }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_timeline != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific action after recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_action = '{{ galaxy_db_restore_action }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != '' + become: true + become_user: postgres + - name: Restore Galaxy DB backup - touch the signal file ansible.builtin.shell: cmd: "touch {{ psql_data_dir.stdout }}/recovery.signal " @@ -103,3 +169,22 @@ name: postgresql state: started become: true + + - name: Register if postgresql log exists + ansible.builtin.shell: + cmd: "ls /var/log/postgresql/postgresql-{{ postgresql_default_version }}-main.log" + register: psql_log + ignore_errors: true + become: true + + - name: Restore Galaxy DB backup - touch the signal file + ansible.builtin.shell: + cmd: "tail -20 {{ psql_log.stdout }}" + when: psql_data_dir.stdout != '' and psql_log.rc == 0 + become: true + + - name: Restore Galaxy DB backup - start Galaxy + ansible.builtin.shell: + cmd: galaxyctl start + become: true + when: galaxyctl_status.rc == 0 From 5d6c3119b519df0afee96b4ce4faac8ebff658ba Mon Sep 17 00:00:00 2001 From: Martin Demko <325073@mail.muni.cz> Date: Mon, 9 Jun 2025 15:51:29 +0200 Subject: [PATCH 5/9] adds test playbook (has to be removed eventualy) --- test_prompt.yaml | 185 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 test_prompt.yaml diff --git a/test_prompt.yaml b/test_prompt.yaml new file mode 100644 index 0000000..116363f --- /dev/null +++ b/test_prompt.yaml @@ -0,0 +1,185 @@ +--- +- hosts: galaxy-qa1.galaxy.cloud.e-infra.cz + become: true + become_user: root + vars: + galaxy_db_restore_version: '20250529T010001Z' # Example: '20250529T010001Z'; 'latest' + galaxy_db_restore_timestamp: '2025-05-29 02:30:00' # Example: '2025-05-29 02:30:00' + galaxy_db_restore_timeline: 'current' # could be 'latest' (default) or 'current', which recovers along the same timeline that was current when the base backup was taken. Or using '0xID' for specific numeric timeline ID (hexadecimal number used in WAL file name) + galaxy_db_restore_action: 'promote' # could be 'promote' (to continue right after recovery) or 'pause' (by default) to manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user +# pre_tasks: +# - name: Install Dependencies +# package: +# name: ['acl', 'anacron', 'rsync'] +# roles: +# - galaxyproject.postgresql +# - role: galaxyproject.postgresql_objects +# become: true +# become_user: postgres +# post_tasks: +# - name: Register if galaxy exists +# ansible.builtin.shell: +# cmd: galaxyctl status +# register: galaxyctl_status +# ignore_errors: true +# become: true + + tasks: + - name: Register psql data directory + ansible.builtin.shell: + cmd: psql -Atc 'show data_directory;' + register: psql_data_dir + become: true + become_user: postgres + +# Unsucessfull attempt to make a prompt with the list of all available backup versions + - name: Gather Galaxy DB backup versions + find: + paths: "{{ postgresql_backup_dir }}" + file_type: directory + patterns: "*Z" + # You can also use file_type: 'file' for files only, or 'directory' for directories only + register: db_versions + become: true + become_user: postgres + + - name: Set backup versions as a list + set_fact: + version_list: "{{ db_versions.files | map(attribute='path') | list }}" + + - name: Select Galaxy DB backup version from a list + debug: + msg: + - "Select database version to restore:" + - "{{ db_versions.files | map(attribute='path') | map('basename') }}" + + - name: Wait for selection + pause: + prompt: "Select version to restore from the list: {{ db_versions.files | map(attribute='path') | map('basename') }}" + register: selected_version + + - name: Print selected Galaxy DB backup versions + debug: + msg: "You selected {{ selected_version.user_input }} to restore" + +# - name: Print PostgreSQL data directory path +# debug: +# msg: "PostgreSQL data directory path: {{ psql_data_dir.stdout }}" +# - name: Print Warning +# debug: +# msg: "No PostgreSQL data directory path! PSQL DB restore is not possible!" +# when: psql_data_dir.stdout == '' +# +# - name: Restore Galaxy DB backup - pick proper backup version +# ansible.builtin.shell: +# cmd: "if [ '{{ galaxy_db_restore_version }}' = 'latest' ]; then ls -dt {{ postgresql_backup_dir }}/20*Z | head -1; else ls -d {{ postgresql_backup_dir }}/{{ galaxy_db_restore_version }}; fi " +# register: galaxy_db_restore_dir +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - print selected version +# debug: +# msg: "Backup version to be restored: {{ galaxy_db_restore_dir.stdout }}" +# +# - name: Restore Galaxy DB backup - stop Galaxy +# ansible.builtin.shell: +# cmd: galaxyctl stop +# become: true +# when: galaxyctl_status.rc == 0 +# +# - name: Restore Galaxy DB backup - stop postgresql +# ansible.builtin.systemd: +# name: postgresql +# state: stopped +# become: true +# +# - name: Restore Galaxy DB backup - backup current postgresql +# ansible.builtin.shell: +# cmd: "mv {{ psql_data_dir.stdout }} {{ psql_data_dir.stdout+'.backup_'+ansible_date_time.iso8601 }}" +# ignore_errors: true +# when: psql_data_dir.stdout != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - copy proper backup version +# ansible.builtin.shell: +# cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0700 {{ psql_data_dir.stdout }} " +# when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - fill postgresql.auto.conf +# ansible.builtin.lineinfile: +# path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" +# insertafter: EOF +# line: "restore_command = 'cp \"{{ postgresql_backup_dir }}/wal_archive/%f\" \"%p\"'" +# state: present +# create: true +# when: psql_data_dir.stdout != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timestamp of recovery) +# ansible.builtin.lineinfile: +# path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" +# insertafter: EOF +# line: "recovery_target_time = '{{ galaxy_db_restore_timestamp }}'" +# state: present +# create: true +# when: psql_data_dir.stdout != '' and galaxy_db_restore_timestamp != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timeline of recovery) +# ansible.builtin.lineinfile: +# path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" +# insertafter: EOF +# line: "recovery_target_timeline = '{{ galaxy_db_restore_timeline }}'" +# state: present +# create: true +# when: psql_data_dir.stdout != '' and galaxy_db_restore_timeline != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific action after recovery) +# ansible.builtin.lineinfile: +# path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" +# insertafter: EOF +# line: "recovery_target_action = '{{ galaxy_db_restore_action }}'" +# state: present +# create: true +# when: psql_data_dir.stdout != '' and galaxy_db_restore_action != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - touch the signal file +# ansible.builtin.shell: +# cmd: "touch {{ psql_data_dir.stdout }}/recovery.signal " +# when: psql_data_dir.stdout != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - start postgresql +# ansible.builtin.systemd: +# name: postgresql +# state: started +# become: true +# +# - name: Register if postgresql log exists +# ansible.builtin.shell: +# cmd: "ls /var/log/postgresql/postgresql-{{ postgresql_default_version }}-main.log" +# register: psql_log +# ignore_errors: true +# become: true +# +# - name: Restore Galaxy DB backup - touch the signal file +# ansible.builtin.shell: +# cmd: "tail -20 {{ psql_log.stdout }}" +# when: psql_data_dir.stdout != '' and psql_log.rc == 0 +# become: true +# +# - name: Restore Galaxy DB backup - start Galaxy +# ansible.builtin.shell: +# cmd: galaxyctl start +# become: true +# when: galaxyctl_status.rc == 0 From f0402df35bbc0144d0ee43d80dd601426ed9e700 Mon Sep 17 00:00:00 2001 From: Martin Demko <325073@mail.muni.cz> Date: Mon, 23 Jun 2025 16:25:11 +0200 Subject: [PATCH 6/9] adds new role to restore psql database into playbook --- galaxy.yml | 1 + galaxy_db_recovery.yaml | 18 +- .../defaults/main.yml | 6 + .../tasks/main.yml | 185 ++++++++++++++++++ 4 files changed, 207 insertions(+), 3 deletions(-) create mode 100644 roles/metacentrum.postgresql_restore/defaults/main.yml create mode 100644 roles/metacentrum.postgresql_restore/tasks/main.yml diff --git a/galaxy.yml b/galaxy.yml index faabdde..5abff1a 100644 --- a/galaxy.yml +++ b/galaxy.yml @@ -99,6 +99,7 @@ - role: galaxyproject.postgresql_objects become: true become_user: postgres + - role: metacentrum.postgresql_restore - hosts: noletsencrypt become: true diff --git a/galaxy_db_recovery.yaml b/galaxy_db_recovery.yaml index a2b8823..76ee2fc 100644 --- a/galaxy_db_recovery.yaml +++ b/galaxy_db_recovery.yaml @@ -16,7 +16,7 @@ become_user: root vars: galaxy_db_restore_version: '20250529T010001Z' # Example: '20250529T010001Z'; 'latest' - galaxy_db_restore_timestamp: '2025-05-29 02:30:00' # Example: '2025-05-29 02:30:00' + galaxy_db_restore_timestamp: '2025-05-29 02:30:00' # The point of return must have happened after DB base backup creation (version timestamp). Example: '2025-05-29 02:30:00' galaxy_db_restore_timeline: 'current' # could be 'latest' (default) or 'current', which recovers along the same timeline that was current when the base backup was taken. Or using '0xID' for specific numeric timeline ID (hexadecimal number used in WAL file name) galaxy_db_restore_action: 'promote' # could be 'promote' (to continue right after recovery) or 'pause' (by default) to manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user pre_tasks: @@ -170,19 +170,31 @@ state: started become: true - - name: Register if postgresql log exists + - name: Restore Galaxy DB backup - register if postgresql log exists ansible.builtin.shell: cmd: "ls /var/log/postgresql/postgresql-{{ postgresql_default_version }}-main.log" register: psql_log ignore_errors: true become: true - - name: Restore Galaxy DB backup - touch the signal file + - name: Restore Galaxy DB backup - show the PSQL log tail ansible.builtin.shell: cmd: "tail -20 {{ psql_log.stdout }}" when: psql_data_dir.stdout != '' and psql_log.rc == 0 become: true + - name: Restore Galaxy DB backup - wait to read the PSQL log + ansible.builtin.pause: + seconds: 10 + when: psql_data_dir.stdout != '' and galaxy_db_restore_action == 'promote' + become: true + + - name: Restore Galaxy DB backup - wait for manual check of PSQL state + ansible.builtin.pause: + prompt: Please, manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user on the server. + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != 'promote' + become: true + - name: Restore Galaxy DB backup - start Galaxy ansible.builtin.shell: cmd: galaxyctl start diff --git a/roles/metacentrum.postgresql_restore/defaults/main.yml b/roles/metacentrum.postgresql_restore/defaults/main.yml new file mode 100644 index 0000000..0eea75b --- /dev/null +++ b/roles/metacentrum.postgresql_restore/defaults/main.yml @@ -0,0 +1,6 @@ +# If galaxy_db_restore_version will be an empty string, no database restore will take place +galaxy_db_restore_version: '' # Example: '20250529T010001Z'; 'latest' +# The point of return must have happened after DB base backup creation (version timestamp) +galaxy_db_restore_timestamp: '' # Example: '2025-05-29 02:30:00' +galaxy_db_restore_timeline: 'latest' # could be 'latest' (default) or 'current', which recovers along the same timeline that was current when the base backup was taken. Or using '0xID' for specific numeric timeline ID (hexadecimal number used in WAL file name) +galaxy_db_restore_action: 'pause' # could be 'promote' (to continue right after recovery) or 'pause' (by default) to manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user diff --git a/roles/metacentrum.postgresql_restore/tasks/main.yml b/roles/metacentrum.postgresql_restore/tasks/main.yml new file mode 100644 index 0000000..e4c20d0 --- /dev/null +++ b/roles/metacentrum.postgresql_restore/tasks/main.yml @@ -0,0 +1,185 @@ + - name: Install Dependencies + package: + name: ['rsync'] + become: true + when: galaxy_db_restore_version != '' + +# recover from WAL based on https://training.galaxyproject.org/training-material/topics/admin/tutorials/backup-cleanup/tutorial.html#restoration + - name: Register if galaxy exists + ansible.builtin.shell: + cmd: galaxyctl status + register: galaxyctl_status + ignore_errors: true + become: true + when: galaxy_db_restore_version != '' + + - name: Register psql data directory + ansible.builtin.shell: + cmd: psql -Atc 'show data_directory;' + register: psql_data_dir + become: true + become_user: postgres + when: galaxy_db_restore_version != '' + +## Unsucessfull attempt to make a prompt with the list of all available backup versions +# - name: Gather Galaxy DB backup versions +# find: +# paths: "{{ postgresql_backup_dir }}" +# file_type: directory +# # You can also use file_type: 'file' for files only, or 'directory' for directories only +# register: db_versions +# become: true +# become_user: postgres +# - name: Set backup versions as a list +# set_fact: +# item_choices: "{{ db_versions.files | map(attribute='path') | list }}" +# - name: Select Galaxy DB backup version from a list +# vars_prompt: +# - name: "selected_version" +# prompt: "Select Galaxy DB version to restore" +# private: no +# choices: "{{ item_choices }}" +# - name: Print selected Galaxy DB backup versions +# debug: +# msg: "You selected {{ selected_version }} to restore" + + - name: Print PostgreSQL data directory path + debug: + msg: "PostgreSQL data directory path: {{ psql_data_dir.stdout }}" + when: galaxy_db_restore_version != '' + + - name: Print Warning + debug: + msg: "No PostgreSQL data directory path! PSQL DB restore is not possible!" + when: psql_data_dir.stdout == '' and galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - pick proper backup version + ansible.builtin.shell: + cmd: "if [ '{{ galaxy_db_restore_version }}' = 'latest' ]; then ls -dt {{ postgresql_backup_dir }}/20*Z | head -1; else ls -d {{ postgresql_backup_dir }}/{{ galaxy_db_restore_version }}; fi " + register: galaxy_db_restore_dir + become: true + become_user: postgres + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - print selected version + debug: + msg: "Backup version to be restored: {{ galaxy_db_restore_dir.stdout }}" + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - stop Galaxy + ansible.builtin.shell: + cmd: galaxyctl stop + become: true + when: galaxyctl_status.rc == 0 and galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - stop postgresql + ansible.builtin.systemd: + name: postgresql + state: stopped + become: true + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - backup current postgresql + ansible.builtin.shell: + cmd: "mv {{ psql_data_dir.stdout }} {{ psql_data_dir.stdout+'.backup_'+ansible_date_time.iso8601 }}" + ignore_errors: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - copy proper backup version + ansible.builtin.shell: + cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0700 {{ psql_data_dir.stdout }} " + when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "restore_command = 'cp \"{{ postgresql_backup_dir }}/wal_archive/%f\" \"%p\"'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timestamp of recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_time = '{{ galaxy_db_restore_timestamp }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_timestamp != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timeline of recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_timeline = '{{ galaxy_db_restore_timeline }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_timeline != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific action after recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_action = '{{ galaxy_db_restore_action }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - touch the signal file + ansible.builtin.shell: + cmd: "touch {{ psql_data_dir.stdout }}/recovery.signal " + when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - start postgresql + ansible.builtin.systemd: + name: postgresql + state: started + become: true + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - register if postgresql log exists + ansible.builtin.shell: + cmd: "ls /var/log/postgresql/postgresql-{{ postgresql_default_version }}-main.log" + register: psql_log + ignore_errors: true + become: true + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - show the PSQL log tail + ansible.builtin.shell: + cmd: "tail -20 {{ psql_log.stdout }}" + when: psql_data_dir.stdout != '' and psql_log.rc == 0 and galaxy_db_restore_version != '' + become: true + + - name: Restore Galaxy DB backup - wait to read the PSQL log + ansible.builtin.pause: + seconds: 10 + when: psql_data_dir.stdout != '' and galaxy_db_restore_action == 'promote' and galaxy_db_restore_version != '' + become: true + + - name: Restore Galaxy DB backup - wait for manual check of PSQL state + ansible.builtin.pause: + prompt: Please, manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user on the server. + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != 'promote' and galaxy_db_restore_version != '' + become: true + + - name: Restore Galaxy DB backup - start Galaxy + ansible.builtin.shell: + cmd: galaxyctl start + become: true + when: galaxyctl_status.rc == 0 and galaxy_db_restore_version != '' From 2809640a8c0a6bc0476d1c60969815a8d8128423 Mon Sep 17 00:00:00 2001 From: Martin Demko <325073@mail.muni.cz> Date: Thu, 11 Sep 2025 10:58:35 +0200 Subject: [PATCH 7/9] sets correct galaxy release version --- host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml b/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml index 10868ad..048eca3 100644 --- a/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml +++ b/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml @@ -1,5 +1,5 @@ -galaxy_commit_id: release_24.2 -# galaxy_build_client: false +galaxy_commit_id: release_25.0 +galaxy_build_client: false csnt_brand: QA1-TEST csnt_log_level: DEBUG From 8b9622ed18edd1e6de53444da69a6b5c0f209117 Mon Sep 17 00:00:00 2001 From: Martin Demko <325073@mail.muni.cz> Date: Thu, 11 Sep 2025 11:58:20 +0200 Subject: [PATCH 8/9] fixes spelling typo in the commentary --- galaxy_db_recovery.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy_db_recovery.yaml b/galaxy_db_recovery.yaml index 76ee2fc..7659359 100644 --- a/galaxy_db_recovery.yaml +++ b/galaxy_db_recovery.yaml @@ -30,7 +30,7 @@ become_user: postgres post_tasks: # recover from WAL based on https://training.galaxyproject.org/training-material/topics/admin/tutorials/backup-cleanup/tutorial.html#restoration - ## Part responsible for recovering Galaxy DB from backup if exists (it should be in post_tasks of dbservers but RDB (or NDB) access needs to be set up first) + ## Part responsible for recovering Galaxy DB from backup if exists (it should be in post_tasks of dbservers but RBD (or NBD) access needs to be set up first) - name: Register if galaxy exists ansible.builtin.shell: cmd: galaxyctl status From 1ed43f12f2af2a45fe822736b6b4bdc97205810a Mon Sep 17 00:00:00 2001 From: Martin Demko <325073@mail.muni.cz> Date: Thu, 11 Sep 2025 11:59:31 +0200 Subject: [PATCH 9/9] moves conditional clause from tasks of the role to limit the role itself --- galaxy.yml | 1 + .../tasks/main.yml | 35 +++++++------------ 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/galaxy.yml b/galaxy.yml index 5abff1a..3d30313 100644 --- a/galaxy.yml +++ b/galaxy.yml @@ -100,6 +100,7 @@ become: true become_user: postgres - role: metacentrum.postgresql_restore + when: galaxy_db_restore_version != '' - hosts: noletsencrypt become: true diff --git a/roles/metacentrum.postgresql_restore/tasks/main.yml b/roles/metacentrum.postgresql_restore/tasks/main.yml index e4c20d0..445aa77 100644 --- a/roles/metacentrum.postgresql_restore/tasks/main.yml +++ b/roles/metacentrum.postgresql_restore/tasks/main.yml @@ -2,7 +2,6 @@ package: name: ['rsync'] become: true - when: galaxy_db_restore_version != '' # recover from WAL based on https://training.galaxyproject.org/training-material/topics/admin/tutorials/backup-cleanup/tutorial.html#restoration - name: Register if galaxy exists @@ -11,7 +10,6 @@ register: galaxyctl_status ignore_errors: true become: true - when: galaxy_db_restore_version != '' - name: Register psql data directory ansible.builtin.shell: @@ -19,7 +17,6 @@ register: psql_data_dir become: true become_user: postgres - when: galaxy_db_restore_version != '' ## Unsucessfull attempt to make a prompt with the list of all available backup versions # - name: Gather Galaxy DB backup versions @@ -46,12 +43,11 @@ - name: Print PostgreSQL data directory path debug: msg: "PostgreSQL data directory path: {{ psql_data_dir.stdout }}" - when: galaxy_db_restore_version != '' - name: Print Warning debug: msg: "No PostgreSQL data directory path! PSQL DB restore is not possible!" - when: psql_data_dir.stdout == '' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout == '' - name: Restore Galaxy DB backup - pick proper backup version ansible.builtin.shell: @@ -59,38 +55,35 @@ register: galaxy_db_restore_dir become: true become_user: postgres - when: galaxy_db_restore_version != '' - name: Restore Galaxy DB backup - print selected version debug: msg: "Backup version to be restored: {{ galaxy_db_restore_dir.stdout }}" - when: galaxy_db_restore_version != '' - name: Restore Galaxy DB backup - stop Galaxy ansible.builtin.shell: cmd: galaxyctl stop become: true - when: galaxyctl_status.rc == 0 and galaxy_db_restore_version != '' + when: galaxyctl_status.rc == 0 - name: Restore Galaxy DB backup - stop postgresql ansible.builtin.systemd: name: postgresql state: stopped become: true - when: galaxy_db_restore_version != '' - name: Restore Galaxy DB backup - backup current postgresql ansible.builtin.shell: cmd: "mv {{ psql_data_dir.stdout }} {{ psql_data_dir.stdout+'.backup_'+ansible_date_time.iso8601 }}" ignore_errors: true - when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' become: true become_user: postgres - name: Restore Galaxy DB backup - copy proper backup version ansible.builtin.shell: cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0700 {{ psql_data_dir.stdout }} " - when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' become: true become_user: postgres @@ -101,7 +94,7 @@ line: "restore_command = 'cp \"{{ postgresql_backup_dir }}/wal_archive/%f\" \"%p\"'" state: present create: true - when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' become: true become_user: postgres @@ -112,7 +105,7 @@ line: "recovery_target_time = '{{ galaxy_db_restore_timestamp }}'" state: present create: true - when: psql_data_dir.stdout != '' and galaxy_db_restore_timestamp != '' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' and galaxy_db_restore_timestamp != '' become: true become_user: postgres @@ -123,7 +116,7 @@ line: "recovery_target_timeline = '{{ galaxy_db_restore_timeline }}'" state: present create: true - when: psql_data_dir.stdout != '' and galaxy_db_restore_timeline != '' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' and galaxy_db_restore_timeline != '' become: true become_user: postgres @@ -134,14 +127,14 @@ line: "recovery_target_action = '{{ galaxy_db_restore_action }}'" state: present create: true - when: psql_data_dir.stdout != '' and galaxy_db_restore_action != '' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != '' become: true become_user: postgres - name: Restore Galaxy DB backup - touch the signal file ansible.builtin.shell: cmd: "touch {{ psql_data_dir.stdout }}/recovery.signal " - when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' become: true become_user: postgres @@ -150,7 +143,6 @@ name: postgresql state: started become: true - when: galaxy_db_restore_version != '' - name: Restore Galaxy DB backup - register if postgresql log exists ansible.builtin.shell: @@ -158,28 +150,27 @@ register: psql_log ignore_errors: true become: true - when: galaxy_db_restore_version != '' - name: Restore Galaxy DB backup - show the PSQL log tail ansible.builtin.shell: cmd: "tail -20 {{ psql_log.stdout }}" - when: psql_data_dir.stdout != '' and psql_log.rc == 0 and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' and psql_log.rc == 0 become: true - name: Restore Galaxy DB backup - wait to read the PSQL log ansible.builtin.pause: seconds: 10 - when: psql_data_dir.stdout != '' and galaxy_db_restore_action == 'promote' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' and galaxy_db_restore_action == 'promote' become: true - name: Restore Galaxy DB backup - wait for manual check of PSQL state ansible.builtin.pause: prompt: Please, manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user on the server. - when: psql_data_dir.stdout != '' and galaxy_db_restore_action != 'promote' and galaxy_db_restore_version != '' + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != 'promote' become: true - name: Restore Galaxy DB backup - start Galaxy ansible.builtin.shell: cmd: galaxyctl start become: true - when: galaxyctl_status.rc == 0 and galaxy_db_restore_version != '' + when: galaxyctl_status.rc == 0