@@ -54,6 +54,7 @@ def read_csv(
5454 Try to mimic as most as possible pandas.read_csv()
5555 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
5656 P.S. max_result_size != None tries to mimic the chunksize behaviour in pandas.read_sql()
57+
5758 :param path: AWS S3 path (E.g. S3://BUCKET_NAME/KEY_NAME)
5859 :param max_result_size: Max number of bytes on each request to S3
5960 :param header: Same as pandas.read_csv()
@@ -131,6 +132,7 @@ def _read_csv_iterator(
131132 Read CSV file from AWS S3 using optimized strategies.
132133 Try to mimic as most as possible pandas.read_csv()
133134 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
135+
134136 :param client_s3: Boto3 S3 client object
135137 :param bucket_name: S3 bucket name
136138 :param key_path: S3 key path (W/o bucket)
@@ -235,6 +237,7 @@ def _extract_terminator_profile(body, sep, quotechar, lineterminator,
235237 last_index ):
236238 """
237239 Backward parser for quoted CSV lines
240+
238241 :param body: String
239242 :param sep: Same as pandas.read_csv()
240243 :param quotechar: Same as pandas.read_csv()
@@ -290,6 +293,7 @@ def _extract_terminator_profile(body, sep, quotechar, lineterminator,
290293 def _find_terminator (body , sep , quoting , quotechar , lineterminator ):
291294 """
292295 Find for any suspicious of line terminator (From end to start)
296+
293297 :param body: String
294298 :param sep: Same as pandas.read_csv()
295299 :param quoting: Same as pandas.read_csv()
@@ -345,6 +349,7 @@ def _read_csv_once(
345349 Read CSV file from AWS S3 using optimized strategies.
346350 Try to mimic as most as possible pandas.read_csv()
347351 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
352+
348353 :param client_s3: Boto3 S3 client object
349354 :param bucket_name: S3 bucket name
350355 :param key_path: S3 key path (W/o bucket)
@@ -391,6 +396,7 @@ def read_sql_athena(self,
391396 """
392397 Executes any SQL query on AWS Athena and return a Dataframe of the result.
393398 P.S. If max_result_size is passed, then a iterator of Dataframes is returned.
399+
394400 :param sql: SQL Query
395401 :param database: Glue/Athena Database
396402 :param s3_output: AWS S3 path
@@ -436,6 +442,7 @@ def to_csv(
436442 """
437443 Write a Pandas Dataframe as CSV files on S3
438444 Optionally writes metadata on AWS Glue.
445+
439446 :param dataframe: Pandas Dataframe
440447 :param path: AWS S3 path (E.g. s3://bucket-name/folder_name/
441448 :param database: AWS Glue Database name
@@ -474,6 +481,7 @@ def to_parquet(self,
474481 """
475482 Write a Pandas Dataframe as parquet files on S3
476483 Optionally writes metadata on AWS Glue.
484+
477485 :param dataframe: Pandas Dataframe
478486 :param path: AWS S3 path (E.g. s3://bucket-name/folder_name/
479487 :param database: AWS Glue Database name
@@ -483,8 +491,7 @@ def to_parquet(self,
483491 :param mode: "append", "overwrite", "overwrite_partitions"
484492 :param procs_cpu_bound: Number of cores used for CPU bound tasks
485493 :param procs_io_bound: Number of cores used for I/O bound tasks
486- :param cast_columns: Dictionary of columns names and Arrow types to be casted.
487- E.g. {"col name": "int64", "col2 name": "int32"}
494+ :param cast_columns: Dictionary of columns names and Arrow types to be casted. (E.g. {"col name": "int64", "col2 name": "int32"})
488495 :return: List of objects written on S3
489496 """
490497 return self .to_s3 (dataframe = dataframe ,
@@ -514,6 +521,7 @@ def to_s3(self,
514521 """
515522 Write a Pandas Dataframe on S3
516523 Optionally writes metadata on AWS Glue.
524+
517525 :param dataframe: Pandas Dataframe
518526 :param path: AWS S3 path (E.g. s3://bucket-name/folder_name/
519527 :param file_format: "csv" or "parquet"
@@ -524,9 +532,7 @@ def to_s3(self,
524532 :param mode: "append", "overwrite", "overwrite_partitions"
525533 :param procs_cpu_bound: Number of cores used for CPU bound tasks
526534 :param procs_io_bound: Number of cores used for I/O bound tasks
527- :param cast_columns: Dictionary of columns indexes and Arrow types to be casted.
528- E.g. {2: "int64", 5: "int32"}
529- Only for "parquet" file_format
535+ :param cast_columns: Dictionary of columns indexes and Arrow types to be casted. (E.g. {2: "int64", 5: "int32"}) (Only for "parquet" file_format)
530536 :return: List of objects written on S3
531537 """
532538 if not partition_cols :
@@ -769,17 +775,16 @@ def to_redshift(
769775 ):
770776 """
771777 Load Pandas Dataframe as a Table on Amazon Redshift
778+
772779 :param dataframe: Pandas Dataframe
773780 :param path: S3 path to write temporary files (E.g. s3://BUCKET_NAME/ANY_NAME/)
774781 :param connection: A PEP 249 compatible connection (Can be generated with Redshift.generate_connection())
775782 :param schema: The Redshift Schema for the table
776783 :param table: The name of the desired Redshift table
777784 :param iam_role: AWS IAM role with the related permissions
778- :param diststyle: Redshift distribution styles. Must be in ["AUTO", "EVEN", "ALL", "KEY"]
779- https://docs.aws.amazon.com/redshift/latest/dg/t_Distributing_data.html
785+ :param diststyle: Redshift distribution styles. Must be in ["AUTO", "EVEN", "ALL", "KEY"] (https://docs.aws.amazon.com/redshift/latest/dg/t_Distributing_data.html)
780786 :param distkey: Specifies a column name or positional number for the distribution key
781- :param sortstyle: Sorting can be "COMPOUND" or "INTERLEAVED"
782- https://docs.aws.amazon.com/redshift/latest/dg/t_Sorting_data.html
787+ :param sortstyle: Sorting can be "COMPOUND" or "INTERLEAVED" (https://docs.aws.amazon.com/redshift/latest/dg/t_Sorting_data.html)
783788 :param sortkey: List of columns to be sorted
784789 :param preserve_index: Should we preserve the Dataframe index?
785790 :param mode: append or overwrite
0 commit comments