@@ -492,7 +492,7 @@ def _checkpoint_add_directory(basename):
492492 return m [1 ], f"checkpoint{ m [3 ]} "
493493
494494
495- def post_checkpoint_callback (cfg , num_updates , training_finished , filename ):
495+ def post_checkpoint_callback (cfg , num_updates , training_finished , filename , files_to_symlink_to ):
496496 if cfg .checkpoint .cloud_upload_path is not None :
497497 if "blob.core.windows.net" in cfg .checkpoint .cloud_upload_path :
498498 azcopy_logs = filename + "_azcopy_logs"
@@ -521,6 +521,9 @@ def post_checkpoint_callback(cfg, num_updates, training_finished, filename):
521521 f"Successfully copied { filename } to { cfg .checkpoint .cloud_upload_path } "
522522 )
523523 os .remove (filename )
524+
525+ # TODO[Susan]: Add symlink logic here? Check what cloud_upload_path is being used for Uriel's jobs.
526+
524527 elif cfg .checkpoint .cloud_upload_path .startswith ("nfs:" ):
525528 path , basename = os .path .split (filename )
526529 checkpoint_dir , checkpoint_file = _checkpoint_add_directory (basename )
@@ -560,6 +563,8 @@ def post_checkpoint_callback(cfg, num_updates, training_finished, filename):
560563 )
561564 os .remove (filename )
562565
566+ # TODO[Susan]: Add symlink logic here.
567+
563568 # Start running evals on uploaded checkpoint
564569 nfs_evaluation (
565570 cfg ,
@@ -583,6 +588,16 @@ def post_checkpoint_callback(cfg, num_updates, training_finished, filename):
583588 except (FileNotFoundError , AssertionError ) as e :
584589 logger .info (f"could not upload { filename } : { e } " )
585590
591+ # TODO[Susan]: Add symlink logic here.
592+
593+ # if files_to_symlink_to is not None and len(files_to_symlink_to) > 1:
594+ # for other_checkpoint in files_to_symlink_to:
595+ # if PathManager.islink(other_checkpoint):
596+ # PathManager.rm(other_checkpoint)
597+ # assert PathManager.symlink(
598+ # filename, other_checkpoint
599+ # ), f"Failed to symlink {filename} to {other_checkpoint}"
600+
586601
587602def nfs_evaluation (
588603 cfg , num_updates , training_finished , checkpoint_dir , destination_checkpoints_dir
0 commit comments