@@ -241,6 +241,7 @@ def unload_to_files(
241241 kms_key_id : str | None = None ,
242242 manifest : bool = False ,
243243 partition_cols : list [str ] | None = None ,
244+ cleanpath : bool = False ,
244245 boto3_session : boto3 .Session | None = None ,
245246) -> None :
246247 """Unload Parquet files on s3 from a Redshift query result (Through the UNLOAD command).
@@ -294,6 +295,21 @@ def unload_to_files(
294295 Unload a manifest file on S3.
295296 partition_cols
296297 Specifies the partition keys for the unload operation.
298+ cleanpath
299+ Use CLEANPATH instead of ALLOWOVERWRITE. When True, uses CLEANPATH to remove existing files
300+ located in the Amazon S3 path before unloading files. When False (default), uses ALLOWOVERWRITE
301+ to overwrite existing files, including the manifest file. These options are mutually exclusive.
302+
303+ ALLOWOVERWRITE: By default, UNLOAD fails if it finds files that it would possibly overwrite.
304+ If ALLOWOVERWRITE is specified, UNLOAD overwrites existing files, including the manifest file.
305+
306+ CLEANPATH: Removes existing files located in the Amazon S3 path specified in the TO clause
307+ before unloading files to the specified location. If you include the PARTITION BY clause,
308+ existing files are removed only from the partition folders to receive new files generated
309+ by the UNLOAD operation. You must have the s3:DeleteObject permission on the Amazon S3 bucket.
310+ Files removed using CLEANPATH are permanently deleted and can't be recovered.
311+
312+ For more information, see: https://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD.html
297313 boto3_session
298314 The default boto3 session will be used if **boto3_session** is ``None``.
299315
@@ -307,6 +323,15 @@ def unload_to_files(
307323 ... con=con,
308324 ... iam_role="arn:aws:iam::XXX:role/XXX"
309325 ... )
326+ >>> # Using CLEANPATH instead of ALLOWOVERWRITE
327+ >>> with wr.redshift.connect("MY_GLUE_CONNECTION") as con:
328+ ... wr.redshift.unload_to_files(
329+ ... sql="SELECT * FROM public.mytable",
330+ ... path="s3://bucket/extracted_parquet_files/",
331+ ... con=con,
332+ ... iam_role="arn:aws:iam::XXX:role/XXX",
333+ ... cleanpath=True
334+ ... )
310335
311336
312337 """
@@ -339,11 +364,13 @@ def unload_to_files(
339364 # Escape quotation marks in SQL
340365 sql = sql .replace ("'" , "''" )
341366
367+ overwrite_str : str = "CLEANPATH" if cleanpath else "ALLOWOVERWRITE"
368+
342369 unload_sql = (
343370 f"UNLOAD ('{ sql } ')\n "
344371 f"TO '{ path } '\n "
345372 f"{ auth_str } "
346- "ALLOWOVERWRITE \n "
373+ f" { overwrite_str } \n "
347374 f"{ parallel_str } \n "
348375 f"FORMAT { format_str } \n "
349376 "ENCRYPTED"
@@ -376,6 +403,7 @@ def unload(
376403 chunked : bool | int = False ,
377404 keep_files : bool = False ,
378405 parallel : bool = True ,
406+ cleanpath : bool = False ,
379407 use_threads : bool | int = True ,
380408 boto3_session : boto3 .Session | None = None ,
381409 s3_additional_kwargs : dict [str , str ] | None = None ,
@@ -452,6 +480,21 @@ def unload(
452480 By default, UNLOAD writes data in parallel to multiple files, according to the number of
453481 slices in the cluster. If parallel is False, UNLOAD writes to one or more data files serially,
454482 sorted absolutely according to the ORDER BY clause, if one is used.
483+ cleanpath
484+ Use CLEANPATH instead of ALLOWOVERWRITE. When True, uses CLEANPATH to remove existing files
485+ located in the Amazon S3 path before unloading files. When False (default), uses ALLOWOVERWRITE
486+ to overwrite existing files, including the manifest file. These options are mutually exclusive.
487+
488+ ALLOWOVERWRITE: By default, UNLOAD fails if it finds files that it would possibly overwrite.
489+ If ALLOWOVERWRITE is specified, UNLOAD overwrites existing files, including the manifest file.
490+
491+ CLEANPATH: Removes existing files located in the Amazon S3 path specified in the TO clause
492+ before unloading files to the specified location. If you include the PARTITION BY clause,
493+ existing files are removed only from the partition folders to receive new files generated
494+ by the UNLOAD operation. You must have the s3:DeleteObject permission on the Amazon S3 bucket.
495+ Files removed using CLEANPATH are permanently deleted and can't be recovered.
496+
497+ For more information, see: https://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD.html
455498 dtype_backend
456499 Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,
457500 nullable dtypes are used for all dtypes that have a nullable implementation when
@@ -489,6 +532,15 @@ def unload(
489532 ... con=con,
490533 ... iam_role="arn:aws:iam::XXX:role/XXX"
491534 ... )
535+ >>> # Using CLEANPATH instead of ALLOWOVERWRITE
536+ >>> with wr.redshift.connect("MY_GLUE_CONNECTION") as con:
537+ ... df = wr.redshift.unload(
538+ ... sql="SELECT * FROM public.mytable",
539+ ... path="s3://bucket/extracted_parquet_files/",
540+ ... con=con,
541+ ... iam_role="arn:aws:iam::XXX:role/XXX",
542+ ... cleanpath=True
543+ ... )
492544
493545 """
494546 path = path if path .endswith ("/" ) else f"{ path } /"
@@ -505,6 +557,7 @@ def unload(
505557 kms_key_id = kms_key_id ,
506558 manifest = False ,
507559 parallel = parallel ,
560+ cleanpath = cleanpath ,
508561 boto3_session = boto3_session ,
509562 )
510563 if chunked is False :
0 commit comments