diff --git a/src/archive.c b/src/archive.c index 4058cd0d4..44ddc08c0 100644 --- a/src/archive.c +++ b/src/archive.c @@ -148,7 +148,7 @@ do_archive_push(InstanceState *instanceState, InstanceConfig *instance, char *wa elog(ERROR, "getcwd() error"); /* verify that archive-push --instance parameter is valid */ - system_id = get_system_identifier(current_dir); + system_id = get_system_identifier(current_dir, XLOG_CONTROL_FILE, FIO_DB_HOST); if (instance->pgdata == NULL) elog(ERROR, "Cannot read pg_probackup.conf for this instance"); diff --git a/src/backup.c b/src/backup.c index 46e4f1ea7..cb190cd20 100644 --- a/src/backup.c +++ b/src/backup.c @@ -983,7 +983,7 @@ check_system_identifiers(PGconn *conn, char *pgdata) uint64 system_id_conn; uint64 system_id_pgdata; - system_id_pgdata = get_system_identifier(pgdata); + system_id_pgdata = get_system_identifier(pgdata, XLOG_CONTROL_FILE, FIO_DB_HOST); system_id_conn = get_remote_system_identifier(conn); /* for checkdb check only system_id_pgdata and system_id_conn */ diff --git a/src/init.c b/src/init.c index dc821325a..622310bc2 100644 --- a/src/init.c +++ b/src/init.c @@ -57,7 +57,7 @@ do_add_instance(InstanceState *instanceState, InstanceConfig *instance) "(-D, --pgdata)"); /* Read system_identifier from PGDATA */ - instance->system_identifier = get_system_identifier(instance->pgdata); + instance->system_identifier = get_system_identifier(instance->pgdata, XLOG_CONTROL_FILE, FIO_DB_HOST); /* Starting from PostgreSQL 11 read WAL segment size from PGDATA */ instance->xlog_seg_size = get_xlog_seg_size(instance->pgdata); diff --git a/src/pg_probackup.h b/src/pg_probackup.h index f5fd0f672..08757b67e 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -84,6 +84,7 @@ extern const char *PROGRAM_EMAIL; #define DATABASE_MAP "database_map" #define HEADER_MAP "page_header_map" #define HEADER_MAP_TMP "page_header_map_tmp" +#define XLOG_CONTROL_BAK_FILE XLOG_CONTROL_FILE".pbk.bak" /* Timeout defaults */ #define ARCHIVE_TIMEOUT_DEFAULT 300 @@ -1138,12 +1139,12 @@ extern XLogRecPtr get_next_record_lsn(const char *archivedir, XLogSegNo segno, T extern TimeLineID get_current_timeline(PGconn *conn); extern TimeLineID get_current_timeline_from_control(bool safe); extern XLogRecPtr get_checkpoint_location(PGconn *conn); -extern uint64 get_system_identifier(const char *pgdata_path); +extern uint64 get_system_identifier(const char *pgdata_path, const char *pg_control_filename, fio_location location); extern uint64 get_remote_system_identifier(PGconn *conn); extern uint32 get_data_checksum_version(bool safe); extern pg_crc32c get_pgcontrol_checksum(const char *pgdata_path); extern uint32 get_xlog_seg_size(char *pgdata_path); -extern void get_redo(const char *pgdata_path, RedoParams *redo); +extern void get_redo(const char *pgdata_path, const char *pg_control_filename, RedoParams *redo); extern void set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn); extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location, diff --git a/src/restore.c b/src/restore.c index 7f5df1a00..067abd349 100644 --- a/src/restore.c +++ b/src/restore.c @@ -3,7 +3,7 @@ * restore.c: restore DB cluster and archived WAL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * *------------------------------------------------------------------------- */ @@ -61,6 +61,8 @@ static void create_recovery_conf(InstanceState *instanceState, time_t backup_id, pgBackup *backup, pgRestoreParams *params); static void *restore_files(void *arg); +static size_t restore_file(pgFile *dest_file, const char *to_fullpath, bool already_exists, char *out_buf, + pgBackup *dest_backup, parray *parent_chain, bool use_bitmap, IncrRestoreMode incremental_mode, XLogRecPtr shift_lsn); static void set_orphan_status(parray *backups, pgBackup *parent_backup); static void restore_chain(pgBackup *dest_backup, parray *parent_chain, @@ -487,7 +489,20 @@ do_restore_or_validate(InstanceState *instanceState, time_t target_backup_id, pg { RedoParams redo; parray *timelines = NULL; - get_redo(instance_config.pgdata, &redo); + + /* [Issue #313] check for previous failed incremental restore */ + { + char filename[MAXPGPATH]; + + join_path_components(filename, instance_config.pgdata, XLOG_CONTROL_BAK_FILE); + if (fio_access(filename, F_OK, FIO_DB_HOST) == 0) + { + elog(WARNING, "\"%s\" found, using incremental restore parameters from it", filename); + get_redo(instance_config.pgdata, XLOG_CONTROL_BAK_FILE, &redo); + } + else + get_redo(instance_config.pgdata, XLOG_CONTROL_FILE, &redo); + } if (redo.checksum_version == 0) elog(ERROR, "Incremental restore in 'lsn' mode require " @@ -710,6 +725,9 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, parray *pgdata_files = NULL; parray *dest_files = NULL; parray *external_dirs = NULL; + pgFile *dest_pg_control_file = NULL; + char dest_pg_control_fullpath[MAXPGPATH]; + char dest_pg_control_bak_fullpath[MAXPGPATH]; /* arrays with meta info for multi threaded backup */ pthread_t *threads; restore_files_arg *threads_args; @@ -790,12 +808,7 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, * unless we are running incremental-lsn restore, then bitmap is mandatory. */ if (use_bitmap && parray_num(parent_chain) == 1) - { - if (params->incremental_mode == INCR_NONE) - use_bitmap = false; - else - use_bitmap = true; - } + use_bitmap = params->incremental_mode != INCR_NONE; /* * Restore dest_backup internal directories. @@ -913,6 +926,11 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0) redundant = true; + /* global/pg_control.pbk.bak are always keeped, because it's needed for restart failed incremental restore */ + if (file->external_dir_num == 0 && + pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0) + redundant = false; + /* do not delete the useful internal directories */ if (S_ISDIR(file->mode) && !redundant) continue; @@ -965,6 +983,37 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, dest_bytes = dest_backup->pgdata_bytes; pretty_size(dest_bytes, pretty_dest_bytes, lengthof(pretty_dest_bytes)); + + /* + * [Issue #313] + * find pg_control file (in already sorted earlier dest_files, see parray_qsort(backup->files...)) + * and exclude it from list for future special processing + */ + { + int control_file_elem_index; + pgFile search_key; + MemSet(&search_key, 0, sizeof(pgFile)); + /* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */ + search_key.rel_path = XLOG_CONTROL_FILE; + search_key.external_dir_num = 0; + control_file_elem_index = parray_bsearch_index(dest_files, &search_key, pgFileCompareRelPathWithExternal); + + if (control_file_elem_index < 0) + elog(ERROR, "File \"%s\" not found in backup %s", XLOG_CONTROL_FILE, base36enc(dest_backup->start_time)); + dest_pg_control_file = parray_remove(dest_files, control_file_elem_index); + + join_path_components(dest_pg_control_fullpath, pgdata_path, XLOG_CONTROL_FILE); + join_path_components(dest_pg_control_bak_fullpath, pgdata_path, XLOG_CONTROL_BAK_FILE); + /* + * rename (if it exist) dest control file before restoring + * if it doesn't exist, that mean, that we already restoring in a previously failed + * pgdata, where XLOG_CONTROL_BAK_FILE exist + */ + if (params->incremental_mode != INCR_NONE + && fio_access(dest_pg_control_fullpath, F_OK, FIO_DB_HOST) == 0) + fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_DB_HOST); + } + elog(INFO, "Start restoring backup files. PGDATA size: %s", pretty_dest_bytes); time(&start_time); thread_interrupted = false; @@ -972,30 +1021,30 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, /* Restore files into target directory */ for (i = 0; i < num_threads; i++) { - restore_files_arg *arg = &(threads_args[i]); - - arg->dest_files = dest_files; - arg->pgdata_files = pgdata_files; - arg->dest_backup = dest_backup; - arg->dest_external_dirs = external_dirs; - arg->parent_chain = parent_chain; - arg->dbOid_exclude_list = dbOid_exclude_list; - arg->skip_external_dirs = params->skip_external_dirs; - arg->to_root = pgdata_path; - arg->use_bitmap = use_bitmap; - arg->incremental_mode = params->incremental_mode; - arg->shift_lsn = params->shift_lsn; - threads_args[i].restored_bytes = 0; - /* By default there are some error */ - threads_args[i].ret = 1; + threads_args[i] = (restore_files_arg){ + .dest_files = dest_files, + .pgdata_files = pgdata_files, + .dest_backup = dest_backup, + .dest_external_dirs = external_dirs, + .parent_chain = parent_chain, + .dbOid_exclude_list = dbOid_exclude_list, + .skip_external_dirs = params->skip_external_dirs, + .to_root = pgdata_path, + .use_bitmap = use_bitmap, + .incremental_mode = params->incremental_mode, + .shift_lsn = params->shift_lsn, + .restored_bytes = 0, + /* By default there are some error */ + .ret = 1, + }; /* Useless message TODO: rewrite */ elog(LOG, "Start thread %i", i + 1); - pthread_create(&threads[i], NULL, restore_files, arg); + pthread_create(&threads[i], NULL, restore_files, &(threads_args[i])); } - /* Wait theads */ + /* Wait threads */ for (i = 0; i < num_threads; i++) { pthread_join(threads[i], NULL); @@ -1005,6 +1054,15 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, total_bytes += threads_args[i].restored_bytes; } + /* [Issue #313] copy pg_control at very end */ + if (restore_isok) + { + total_bytes += restore_file(dest_pg_control_file, dest_pg_control_fullpath, false, NULL, + dest_backup, parent_chain, use_bitmap, params->incremental_mode, params->shift_lsn); + if (params->incremental_mode != INCR_NONE) + fio_unlink(dest_pg_control_bak_fullpath, FIO_DB_HOST); + } + time(&end_time); pretty_time_interval(difftime(end_time, start_time), pretty_time, lengthof(pretty_time)); @@ -1070,6 +1128,10 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, elog(ERROR, "Failed to sync file \"%s\": %s", to_fullpath, strerror(errno)); } + /* sync control file */ + if (fio_sync(dest_pg_control_fullpath, FIO_DB_HOST) != 0) + elog(ERROR, "Failed to sync file \"%s\": %s", dest_pg_control_fullpath, strerror(errno)); + time(&end_time); pretty_time_interval(difftime(end_time, start_time), pretty_time, lengthof(pretty_time)); @@ -1089,6 +1151,8 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, parray_free(pgdata_files); } + pgFileFree(dest_pg_control_file); + for (i = parray_num(parent_chain) - 1; i >= 0; i--) { pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); @@ -1107,7 +1171,6 @@ restore_files(void *arg) int i; uint64 n_files; char to_fullpath[MAXPGPATH]; - FILE *out = NULL; char *out_buf = pgut_malloc(STDIO_BUFSIZE); restore_files_arg *arguments = (restore_files_arg *) arg; @@ -1117,9 +1180,6 @@ restore_files(void *arg) for (i = 0; i < parray_num(arguments->dest_files); i++) { bool already_exists = false; - PageState *checksum_map = NULL; /* it should take ~1.5MB at most */ - datapagemap_t *lsn_map = NULL; /* it should take 16kB at most */ - char *errmsg = NULL; /* remote agent error message */ pgFile *dest_file = (pgFile *) parray_get(arguments->dest_files, i); /* Directories were created before */ @@ -1193,73 +1253,101 @@ restore_files(void *arg) already_exists = true; } - /* - * Handle incremental restore case for data files. - * If file is already exists in pgdata, then - * we scan it block by block and get - * array of checksums for every page. - */ - if (already_exists && - dest_file->is_datafile && !dest_file->is_cfs && - dest_file->n_blocks > 0) + arguments->restored_bytes += restore_file(dest_file, to_fullpath, already_exists, out_buf, + arguments->dest_backup, arguments->parent_chain, arguments->use_bitmap, + arguments->incremental_mode, arguments->shift_lsn); + } + + free(out_buf); + + /* ssh connection to longer needed */ + fio_disconnect(); + + /* Data files restoring is successful */ + arguments->ret = 0; + + return NULL; +} + +/* + * Restore one file into $PGDATA. + */ +static size_t +restore_file(pgFile *dest_file, const char *to_fullpath, bool already_exists, char *out_buf, + pgBackup *dest_backup, parray *parent_chain, bool use_bitmap, IncrRestoreMode incremental_mode, XLogRecPtr shift_lsn) +{ + FILE *out = NULL; + size_t restored_bytes = 0; + PageState *checksum_map = NULL; /* it should take ~1.5MB at most */ + datapagemap_t *lsn_map = NULL; /* it should take 16kB at most */ + char *errmsg = NULL; /* remote agent error message */ + + /* + * Handle incremental restore case for data files. + * If file is already exists in pgdata, then + * we scan it block by block and get + * array of checksums for every page. + */ + if (already_exists && + dest_file->is_datafile && !dest_file->is_cfs && + dest_file->n_blocks > 0) + { + if (incremental_mode == INCR_LSN) { - if (arguments->incremental_mode == INCR_LSN) - { - lsn_map = fio_get_lsn_map(to_fullpath, arguments->dest_backup->checksum_version, - dest_file->n_blocks, arguments->shift_lsn, - dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); - } - else if (arguments->incremental_mode == INCR_CHECKSUM) - { - checksum_map = fio_get_checksum_map(to_fullpath, arguments->dest_backup->checksum_version, - dest_file->n_blocks, arguments->dest_backup->stop_lsn, - dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); - } + lsn_map = fio_get_lsn_map(to_fullpath, dest_backup->checksum_version, + dest_file->n_blocks, shift_lsn, + dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); } + else if (incremental_mode == INCR_CHECKSUM) + { + checksum_map = fio_get_checksum_map(to_fullpath, dest_backup->checksum_version, + dest_file->n_blocks, dest_backup->stop_lsn, + dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); + } + } - /* - * Open dest file and truncate it to zero, if destination - * file already exists and dest file size is zero, or - * if file do not exist - */ - if ((already_exists && dest_file->write_size == 0) || !already_exists) - out = fio_fopen(to_fullpath, PG_BINARY_W, FIO_DB_HOST); - /* - * If file already exists and dest size is not zero, - * then open it for reading and writing. - */ - else - out = fio_fopen(to_fullpath, PG_BINARY_R "+", FIO_DB_HOST); - - if (out == NULL) - elog(ERROR, "Cannot open restore target file \"%s\": %s", - to_fullpath, strerror(errno)); + /* + * Open dest file and truncate it to zero, if destination + * file already exists and dest file size is zero, or + * if file do not exist + */ + if ((already_exists && dest_file->write_size == 0) || !already_exists) + out = fio_fopen(to_fullpath, PG_BINARY_W, FIO_DB_HOST); + /* + * If file already exists and dest size is not zero, + * then open it for reading and writing. + */ + else + out = fio_fopen(to_fullpath, PG_BINARY_R "+", FIO_DB_HOST); - /* update file permission */ - if (fio_chmod(to_fullpath, dest_file->mode, FIO_DB_HOST) == -1) - elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, - strerror(errno)); + if (out == NULL) + elog(ERROR, "Cannot open restore target file \"%s\": %s", + to_fullpath, strerror(errno)); - if (!dest_file->is_datafile || dest_file->is_cfs) - elog(VERBOSE, "Restoring nonedata file: \"%s\"", to_fullpath); - else - elog(VERBOSE, "Restoring data file: \"%s\"", to_fullpath); + /* update file permission */ + if (fio_chmod(to_fullpath, dest_file->mode, FIO_DB_HOST) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, + strerror(errno)); - // If destination file is 0 sized, then just close it and go for the next - if (dest_file->write_size == 0) - goto done; + if (!dest_file->is_datafile || dest_file->is_cfs) + elog(VERBOSE, "Restoring nonedata file: \"%s\"", to_fullpath); + else + elog(VERBOSE, "Restoring data file: \"%s\"", to_fullpath); + // If destination file is 0 sized, then just close it and go for the next + if (dest_file->write_size != 0) + { /* Restore destination file */ if (dest_file->is_datafile && !dest_file->is_cfs) { /* enable stdio buffering for local destination data file */ - if (!fio_is_remote_file(out)) + if (!fio_is_remote_file(out) && out_buf != NULL) setvbuf(out, out_buf, _IOFBF, STDIO_BUFSIZE); /* Destination file is data file */ - arguments->restored_bytes += restore_data_file(arguments->parent_chain, + restored_bytes += restore_data_file(parent_chain, dest_file, out, to_fullpath, - arguments->use_bitmap, checksum_map, - arguments->shift_lsn, lsn_map, true); + use_bitmap, checksum_map, + shift_lsn, lsn_map, true); } else { @@ -1267,40 +1355,32 @@ restore_files(void *arg) if (!fio_is_remote_file(out)) setvbuf(out, NULL, _IONBF, BUFSIZ); /* Destination file is nonedata file */ - arguments->restored_bytes += restore_non_data_file(arguments->parent_chain, - arguments->dest_backup, dest_file, out, to_fullpath, + restored_bytes += restore_non_data_file(parent_chain, + dest_backup, dest_file, out, to_fullpath, already_exists); } + } -done: - /* Writing is asynchronous in case of restore in remote mode, so check the agent status */ - if (fio_check_error_file(out, &errmsg)) - elog(ERROR, "Cannot write to the remote file \"%s\": %s", to_fullpath, errmsg); - - /* close file */ - if (fio_fclose(out) != 0) - elog(ERROR, "Cannot close file \"%s\": %s", to_fullpath, - strerror(errno)); - - /* free pagemap used for restore optimization */ - pg_free(dest_file->pagemap.bitmap); + /* Writing is asynchronous in case of restore in remote mode, so check the agent status */ + if (fio_check_error_file(out, &errmsg)) + elog(ERROR, "Cannot write to the remote file \"%s\": %s", to_fullpath, errmsg); - if (lsn_map) - pg_free(lsn_map->bitmap); + /* close file */ + if (fio_fclose(out) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", to_fullpath, + strerror(errno)); - pg_free(lsn_map); - pg_free(checksum_map); - } + if (lsn_map) + pg_free(lsn_map->bitmap); - free(out_buf); + pg_free(lsn_map); + pg_free(checksum_map); - /* ssh connection to longer needed */ - fio_disconnect(); + /* free pagemap used for restore optimization */ + pg_free(dest_file->pagemap.bitmap); + dest_file->pagemap.bitmap = NULL; - /* Data files restoring is successful */ - arguments->ret = 0; - - return NULL; + return restored_bytes; } /* @@ -2153,7 +2233,7 @@ check_incremental_compatibility(const char *pgdata, uint64 system_identifier, bool postmaster_is_up = false; bool backup_label_exists = false; pid_t pid; - char backup_label[MAXPGPATH]; + char filename[MAXPGPATH]; /* check postmaster pid */ pid = fio_check_postmaster(pgdata, FIO_DB_HOST); @@ -2188,7 +2268,12 @@ check_incremental_compatibility(const char *pgdata, uint64 system_identifier, */ elog(INFO, "Trying to read pg_control file in destination direstory"); - system_id_pgdata = get_system_identifier(pgdata); + /* [Issue #313] check for previous failed incremental restore */ + join_path_components(filename, instance_config.pgdata, XLOG_CONTROL_BAK_FILE); + if (fio_access(filename, F_OK, FIO_DB_HOST) == 0) + system_id_pgdata = get_system_identifier(pgdata, XLOG_CONTROL_BAK_FILE, FIO_DB_HOST); + else + system_id_pgdata = get_system_identifier(pgdata, XLOG_CONTROL_FILE, FIO_DB_HOST); if (system_id_pgdata == instance_config.system_identifier) system_id_match = true; @@ -2203,8 +2288,8 @@ check_incremental_compatibility(const char *pgdata, uint64 system_identifier, */ if (incremental_mode == INCR_LSN) { - join_path_components(backup_label, pgdata, "backup_label"); - if (fio_access(backup_label, F_OK, FIO_DB_HOST) == 0) + join_path_components(filename, pgdata, PG_BACKUP_LABEL_FILE); + if (fio_access(filename, F_OK, FIO_DB_HOST) == 0) { elog(WARNING, "Destination directory contains \"backup_control\" file. " "This does NOT mean that you should delete this file and retry, only that " diff --git a/src/util.c b/src/util.c index c0a1dc9e4..cdb64948d 100644 --- a/src/util.c +++ b/src/util.c @@ -249,14 +249,14 @@ get_checkpoint_location(PGconn *conn) } uint64 -get_system_identifier(const char *pgdata_path) +get_system_identifier(const char *pgdata_path, const char *pg_control_filename, fio_location location) { ControlFileData ControlFile; char *buffer; size_t size; /* First fetch file... */ - buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + buffer = slurpFile(pgdata_path, pg_control_filename, &size, false, location); if (buffer == NULL) return 0; digestControlFile(&ControlFile, buffer, size); @@ -351,15 +351,18 @@ get_pgcontrol_checksum(const char *pgdata_path) return ControlFile.crc; } +/* + * you can use XLOG_CONTROL_FILE or XLOG_CONTROL_BAK_FILE for value of control_file_name parameter + */ void -get_redo(const char *pgdata_path, RedoParams *redo) +get_redo(const char *pgdata_path, const char *pg_control_filename, RedoParams *redo) { ControlFileData ControlFile; char *buffer; size_t size; /* First fetch file... */ - buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + buffer = slurpFile(pgdata_path, pg_control_filename, &size, false, FIO_DB_HOST); digestControlFile(&ControlFile, buffer, size); pg_free(buffer); diff --git a/src/utils/parray.c b/src/utils/parray.c index 95b83365d..792e26907 100644 --- a/src/utils/parray.c +++ b/src/utils/parray.c @@ -198,6 +198,13 @@ parray_bsearch(parray *array, const void *key, int(*compare)(const void *, const return bsearch(&key, array->data, array->used, sizeof(void *), compare); } +int +parray_bsearch_index(parray *array, const void *key, int(*compare)(const void *, const void *)) +{ + void **elem = parray_bsearch(array, key, compare); + return elem != NULL ? elem - array->data : -1; +} + /* checks that parray contains element */ bool parray_contains(parray *array, void *elem) { diff --git a/src/utils/parray.h b/src/utils/parray.h index 85d7383f3..e92ad728c 100644 --- a/src/utils/parray.h +++ b/src/utils/parray.h @@ -29,6 +29,7 @@ extern bool parray_rm(parray *array, const void *key, int(*compare)(const void * extern size_t parray_num(const parray *array); extern void parray_qsort(parray *array, int(*compare)(const void *, const void *)); extern void *parray_bsearch(parray *array, const void *key, int(*compare)(const void *, const void *)); +extern int parray_bsearch_index(parray *array, const void *key, int(*compare)(const void *, const void *)); extern void parray_walk(parray *array, void (*action)(void *)); extern bool parray_contains(parray *array, void *elem); diff --git a/tests/helpers/ptrack_helpers.py b/tests/helpers/ptrack_helpers.py index 0a87eb1b6..e3b0aa6c8 100644 --- a/tests/helpers/ptrack_helpers.py +++ b/tests/helpers/ptrack_helpers.py @@ -1602,11 +1602,11 @@ def pgdata_content(self, pgdata, ignore_ptrack=True, exclude_dirs=None): files_to_ignore = [ 'postmaster.pid', 'postmaster.opts', 'pg_internal.init', 'postgresql.auto.conf', - 'backup_label', 'tablespace_map', 'recovery.conf', - 'ptrack_control', 'ptrack_init', 'pg_control', - 'probackup_recovery.conf', 'recovery.signal', - 'standby.signal', 'ptrack.map', 'ptrack.map.mmap', - 'ptrack.map.tmp' + 'backup_label', 'backup_label.old', 'tablespace_map', + 'recovery.conf', 'recovery.done', 'ptrack_control', + 'ptrack_init', 'pg_control', 'probackup_recovery.conf', + 'recovery.signal', 'standby.signal', + 'ptrack.map', 'ptrack.map.mmap', 'ptrack.map.tmp' ] if exclude_dirs: diff --git a/tests/incr_restore.py b/tests/incr_restore.py index cb684a23a..d7dec183c 100644 --- a/tests/incr_restore.py +++ b/tests/incr_restore.py @@ -9,7 +9,7 @@ import hashlib import shutil import json -from testgres import QueryException +from testgres import QueryException, StartNodeException module_name = 'incr_restore' @@ -2436,3 +2436,82 @@ def test_incremental_pg_filenode_map(self): self.del_test_dir(module_name, fname) # check that MinRecPoint and BackupStartLsn are correctly used in case of --incrementa-lsn + + + # @unittest.skip("skip") + def test_incr_restore_issue_313(self): + """ + Check that failed incremental restore can be restarted + """ + fname = self.id().split('.')[3] + + node = self.make_simple_node( + base_dir = os.path.join(module_name, fname, 'node'), + set_replication = True, + initdb_params = ['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale = 50) + + full_backup_id = self.backup_node(backup_dir, 'node', node, backup_type='full') + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + last_backup_id = self.backup_node(backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + node.cleanup() + + self.restore_node(backup_dir, 'node', node, backup_id=full_backup_id) + + count = 0 + filelist = self.get_backup_filelist(backup_dir, 'node', last_backup_id) + for file in filelist: + # count only nondata files + if int(filelist[file]['is_datafile']) == 0 and int(filelist[file]['size']) > 0: + count += 1 + + gdb = self.restore_node(backup_dir, 'node', node, gdb=True, + backup_id=last_backup_id, options=['--progress', '--incremental-mode=checksum']) + gdb.verbose = False + gdb.set_breakpoint('restore_non_data_file') + gdb.run_until_break() + gdb.continue_execution_until_break(count - 2) + gdb.quit() + + try: + node.slow_start() + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because backup is not fully restored") + except StartNodeException as e: + self.assertIn( + 'Cannot start node', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f: + self.assertIn( + "postgres: could not find the database system", + f.read()) + + self.restore_node(backup_dir, 'node', node, + backup_id=last_backup_id, options=['--progress', '--incremental-mode=checksum']) + node.slow_start() + + self.compare_pgdata(pgdata, self.pgdata_content(node.data_dir)) + + # Clean after yourself + node.stop() + self.del_test_dir(module_name, fname) diff --git a/tests/restore.py b/tests/restore.py index 8ccffa44c..363aae1ec 100644 --- a/tests/restore.py +++ b/tests/restore.py @@ -3880,7 +3880,7 @@ def test_restore_issue_313(self): node.cleanup() count = 0 - filelist = self.get_backup_filelist(backup_dir, 'node', backup_id) + filelist = self.get_backup_filelist(backup_dir, 'node', backup_id) for file in filelist: # count only nondata files if int(filelist[file]['is_datafile']) == 0 and int(filelist[file]['size']) > 0: @@ -3918,5 +3918,10 @@ def test_restore_issue_313(self): '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(e.message), self.cmd)) + with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f: + self.assertIn( + "PANIC: could not read from control file: read 0 bytes", + f.read()) + # Clean after yourself self.del_test_dir(module_name, fname)