@@ -323,7 +323,7 @@ def create_backup_site_paths(self, site: str) -> BackupSitePaths:
323323
324324 return backup_site_paths
325325
326- def delete_remote_wal_before (self , wal_segment , site , pg_version ):
326+ def delete_remote_wal_before (self , wal_segment , site , site_prefix , pg_version ):
327327 self .log .info ("Starting WAL deletion from: %r before: %r, pg_version: %r" , site , wal_segment , pg_version )
328328 storage = self .site_transfers .get (site )
329329 valid_timeline = True
@@ -334,7 +334,7 @@ def delete_remote_wal_before(self, wal_segment, site, pg_version):
334334 lsn = lsn .previous_walfile_start_lsn
335335 if lsn is None :
336336 break
337- wal_path = os .path .join (self . _get_site_prefix ( site ) , "xlog" , lsn .walfile_name )
337+ wal_path = os .path .join (site_prefix , "xlog" , lsn .walfile_name )
338338 self .log .debug ("Deleting wal_file: %r" , wal_path )
339339 try :
340340 storage .delete_key (wal_path )
@@ -360,8 +360,8 @@ def delete_remote_wal_before(self, wal_segment, site, pg_version):
360360 def _get_delta_basebackup_files (self , site , storage , metadata , basebackup_name_to_delete , backups_to_keep ) -> List :
361361 delta_formats = (BaseBackupFormat .delta_v1 , BaseBackupFormat .delta_v2 )
362362 assert metadata ["format" ] in delta_formats
363- all_hexdigests = set ()
364- keep_hexdigests = set ()
363+ all_hexdigests : Dict [ str , str ] = {}
364+ keep_hexdigests : Dict [ str , str ] = {}
365365
366366 basebackup_data_files = list ()
367367 delta_backup_names = {
@@ -372,7 +372,7 @@ def _get_delta_basebackup_files(self, site, storage, metadata, basebackup_name_t
372372 delta_backup_names [basebackup_name_to_delete ] = metadata
373373
374374 for backup_name , backup_metadata in delta_backup_names .items ():
375- delta_backup_key = os .path .join (self . _get_site_prefix ( site ) , "basebackup" , backup_name )
375+ delta_backup_key = os .path .join (backup_metadata [ " site-prefix" ] , "basebackup" , backup_name )
376376 meta , _ = download_backup_meta_file (
377377 storage = storage ,
378378 basebackup_path = delta_backup_key ,
@@ -385,18 +385,22 @@ def _get_delta_basebackup_files(self, site, storage, metadata, basebackup_name_t
385385 backup_state = snapshot_result ["state" ]
386386 files = backup_state ["files" ]
387387
388- backup_hexdigests = set (delta_file ["hexdigest" ] for delta_file in files if delta_file ["hexdigest" ])
389- all_hexdigests |= backup_hexdigests
388+ backup_hexdigests = {
389+ delta_file ["hexdigest" ]: backup_metadata ["site-prefix" ]
390+ for delta_file in files
391+ if delta_file ["hexdigest" ]
392+ }
393+ all_hexdigests .update (backup_hexdigests )
390394
391395 if backup_name != basebackup_name_to_delete :
392396 # Keep data file in case if there is still a reference from other backups
393- keep_hexdigests |= backup_hexdigests
397+ keep_hexdigests . update ( backup_hexdigests )
394398 else :
395399 # Add bundles to remove
396400 for chunk in meta .get ("chunks" , []):
397401 basebackup_data_files .append (
398402 os .path .join (
399- self . _get_site_prefix ( site ) ,
403+ backup_metadata [ " site-prefix" ] ,
400404 FileTypePrefixes [FileType .Basebackup_delta_chunk ],
401405 chunk ["chunk_filename" ],
402406 )
@@ -405,16 +409,21 @@ def _get_delta_basebackup_files(self, site, storage, metadata, basebackup_name_t
405409 # Remove unreferenced files
406410 extra_hexdigests = set (all_hexdigests ).difference (keep_hexdigests )
407411 for hexdigest in extra_hexdigests :
408- basebackup_data_files .append (os .path .join (self . _get_site_prefix ( site ) , "basebackup_delta" , hexdigest ))
412+ basebackup_data_files .append (os .path .join (all_hexdigests [ hexdigest ] , "basebackup_delta" , hexdigest ))
409413
410414 return basebackup_data_files
411415
412416 def delete_remote_basebackup (self , site , basebackup , metadata , basebackups ):
413417 start_time = time .monotonic ()
414418 storage = self .site_transfers .get (site )
415- main_backup_key = os .path .join (self . _get_site_prefix ( site ) , "basebackup" , basebackup )
419+ main_backup_key = os .path .join (metadata [ " site-prefix" ] , "basebackup" , basebackup )
416420 basebackup_data_files = [main_backup_key ]
417421
422+ # When we delete a basebackup, let's also delete any WAL segments before its `start-wal-segment`.
423+ pg_version_str = metadata .get ("pg-version" )
424+ pg_version = None if pg_version_str is None else int (pg_version_str )
425+ self .delete_remote_wal_before (metadata ["start-wal-segment" ], site , metadata ["site-prefix" ], pg_version )
426+
418427 if metadata .get ("format" ) == BaseBackupFormat .v2 :
419428 bmeta_compressed = storage .get_contents_to_string (main_backup_key )[0 ]
420429 with rohmufile .file_reader (
@@ -427,7 +436,7 @@ def delete_remote_basebackup(self, site, basebackup, metadata, basebackups):
427436 for chunk in bmeta ["chunks" ]:
428437 basebackup_data_files .append (
429438 os .path .join (
430- self . _get_site_prefix ( site ) ,
439+ metadata [ " site-prefix" ] ,
431440 "basebackup_chunk" ,
432441 chunk ["chunk_filename" ],
433442 )
@@ -457,14 +466,15 @@ def get_or_create_site_storage(self, site):
457466 self .site_transfers [site ] = storage
458467 return storage
459468
460- def get_remote_basebackups_info (self , site ):
469+ def get_remote_basebackups_info (self , site , site_prefix = None ):
461470 storage = self .get_or_create_site_storage (site = site )
462471 site_config = self .config ["backup_sites" ][site ]
463- results = storage .list_path (os .path .join (site_config ["prefix" ], "basebackup" ))
472+ site_prefix = site_prefix or site_config ["prefix" ]
473+ results = storage .list_path (os .path .join (site_prefix , "basebackup" ))
464474 for entry in results :
465475 self .patch_basebackup_info (entry = entry , site_config = site_config )
466476
467- preservation_requests = storage .list_path (os .path .join (site_config [ "prefix" ] , "preservation_request" ))
477+ preservation_requests = storage .list_path (os .path .join (site_prefix , "preservation_request" ))
468478 backups_to_preserve = parse_preservation_requests (preservation_requests )
469479 for entry in results :
470480 patch_basebackup_metadata_with_preservation (entry , backups_to_preserve )
@@ -517,7 +527,7 @@ def determine_backups_to_delete(self, *, basebackups, site_config):
517527 if max_age_days and min_backups > 0 :
518528 while basebackups and len (basebackups ) > min_backups :
519529 if is_basebackup_preserved (basebackups [0 ], now ):
520- self .log .info ("Not deleting more backups because %r still needs to preserved" , basebackups [0 ]["name" ])
530+ self .log .info ("Not deleting more backups because %r still needs to be preserved" , basebackups [0 ]["name" ])
521531 break
522532 # For age checks we treat the age as current_time - (backup_start_time + backup_interval). So when
523533 # backup interval is set to 24 hours a backup started 2.5 days ago would be considered to be 1.5 days old.
@@ -539,31 +549,62 @@ def determine_backups_to_delete(self, *, basebackups, site_config):
539549
540550 def refresh_backup_list_and_delete_old (self , site ):
541551 """Look up basebackups from the object store, prune any extra
542- backups and return the datetime of the latest backup."""
543- basebackups = self .get_remote_basebackups_info (site )
544- self .log .debug ("Found %r basebackups" , basebackups )
552+ backups from the current and the extra backup sites and update
553+ the state with the up-to-date backups list."""
554+ current_basebackups = self .get_remote_basebackups_info (site )
555+ current_site_prefix = self ._get_site_prefix (site )
556+ for basebackup in current_basebackups :
557+ basebackup ["metadata" ]["site-prefix" ] = current_site_prefix
558+
559+ # If `extra_backup_sites_prefixes` is set, let's also check those sites for backups that are due for cleanup.
560+ extra_basebackups = []
561+ if self .config .get ("extra_backup_sites_prefixes" ):
562+ new_extra_backup_sites_prefixes = []
563+ extra_backup_sites_prefixes = self .state ["extra_backup_sites_prefixes" ]
564+
565+ for site_prefix in extra_backup_sites_prefixes :
566+ extra_site_basebackups = self .get_remote_basebackups_info (site , site_prefix = site_prefix )
567+ if not extra_site_basebackups :
568+ continue
569+ for basebackup in extra_site_basebackups :
570+ basebackup ["metadata" ]["site-prefix" ] = site_prefix
571+ extra_basebackups .extend (extra_site_basebackups )
572+ # If we did not find any backup in this extra site, we do not include it in the next round of checks.
573+ new_extra_backup_sites_prefixes .append (site_prefix )
574+
575+ self .state ["extra_backup_sites_prefixes" ] = new_extra_backup_sites_prefixes
576+
577+ extra_basebackups .sort (key = lambda entry : entry ["metadata" ]["start-time" ])
578+ all_basebackups = extra_basebackups + current_basebackups
579+
580+ self .log .debug ("Found %r basebackups" , all_basebackups )
545581
546582 site_config = self .config ["backup_sites" ][site ]
547583 # Never delete backups from a recovery site. This check is already elsewhere as well
548584 # but still check explicitly here to ensure we certainly won't delete anything unexpectedly
549585 if site_config ["active" ]:
550- basebackups_to_delete = self .determine_backups_to_delete (basebackups = basebackups , site_config = site_config )
586+ basebackups_to_delete = self .determine_backups_to_delete (basebackups = all_basebackups , site_config = site_config )
551587
552588 for basebackup_to_be_deleted in basebackups_to_delete :
553589 pg_version_str = basebackup_to_be_deleted ["metadata" ].get ("pg-version" )
554590 pg_version = None if pg_version_str is None else int (pg_version_str )
555- last_wal_segment_still_needed = 0
556- if basebackups :
557- last_wal_segment_still_needed = basebackups [0 ]["metadata" ]["start-wal-segment" ]
591+ oldest_wal_segment_to_keep = ""
592+ if all_basebackups :
593+ oldest_wal_segment_to_keep = all_basebackups [0 ]["metadata" ]["start-wal-segment" ]
558594
559- if last_wal_segment_still_needed :
595+ if oldest_wal_segment_to_keep :
560596 # This is breaking concurrent PITR starting from the *previous* backup.
561597 # That's why once a backup is preserved, we keep that backup and all the next ones.
562- self .delete_remote_wal_before (last_wal_segment_still_needed , site , pg_version )
598+ self .delete_remote_wal_before (
599+ oldest_wal_segment_to_keep , site , basebackup_to_be_deleted ["metadata" ]["site-prefix" ], pg_version
600+ )
563601 self .delete_remote_basebackup (
564- site , basebackup_to_be_deleted ["name" ], basebackup_to_be_deleted ["metadata" ], basebackups = basebackups
602+ site ,
603+ basebackup_to_be_deleted ["name" ],
604+ basebackup_to_be_deleted ["metadata" ],
605+ basebackups = all_basebackups
565606 )
566- self .state ["backup_sites" ][site ]["basebackups" ] = basebackups
607+ self .state ["backup_sites" ][site ]["basebackups" ] = current_basebackups
567608
568609 def get_normalized_backup_time (self , site_config , * , now = None ):
569610 """Returns the closest historical backup time that current time matches to (or current time if it matches).
@@ -589,6 +630,8 @@ def get_normalized_backup_time(self, site_config, *, now=None):
589630 def set_state_defaults (self , site ):
590631 if site not in self .state ["backup_sites" ]:
591632 self .state ["backup_sites" ][site ] = {"basebackups" : []}
633+ if "extra_backup_sites_prefixes" not in self .state :
634+ self .state ["extra_backup_sites_prefixes" ] = self .config .get ("extra_backup_sites_prefixes" , [])
592635
593636 def startup_walk_for_missed_files (self ):
594637 """Check xlog and xlog_incoming directories for files that receivexlog has received but not yet
0 commit comments