Skip to content

Commit 7f0e704

Browse files
committed
Improving spark-to-redshift stress test
1 parent 7f0b4b6 commit 7f0e704

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

awswrangler/redshift.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def write_load_manifest(self, manifest_path: str, objects_paths: List[str], proc
137137
client_s3 = self._session.boto3_session.client(service_name="s3", config=self._session.botocore_config)
138138
bucket: str
139139
bucket, path = manifest_path.replace("s3://", "").split("/", 1)
140+
logger.info(f"payload: {payload}")
140141
client_s3.put_object(Body=payload, Bucket=bucket, Key=path)
141142
return manifest
142143

testing/test_awswrangler/test_redshift.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,10 @@ def test_stress_to_redshift_spark_big(session, bucket, redshift_parameters):
287287
"B": list(range(1_000_000)),
288288
"C": list(range(1_000_000))
289289
}))
290+
dataframe.cache()
290291

291292
for i in range(10):
292-
print(i)
293+
print(f"Run number: {i}")
293294
con = Redshift.generate_connection(
294295
database="test",
295296
host=redshift_parameters.get("RedshiftAddress"),
@@ -299,15 +300,16 @@ def test_stress_to_redshift_spark_big(session, bucket, redshift_parameters):
299300
)
300301
session.spark.to_redshift(
301302
dataframe=dataframe,
302-
path=f"s3://{bucket}/redshift-load/",
303+
path=f"s3://{bucket}/redshift-load-{i}/",
303304
connection=con,
304305
schema="public",
305306
table="test",
306307
iam_role=redshift_parameters.get("RedshiftRole"),
307308
mode="overwrite",
308-
min_num_partitions=4,
309+
min_num_partitions=16,
309310
)
310311
con.close()
312+
dataframe.unpersist()
311313

312314

313315
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)