|
19 | 19 | from boto3 import Session |
20 | 20 |
|
21 | 21 | AWS_CREDENTIALS_DIR = '/etc/boto_cfg/' |
| 22 | +AWS_TEMP_CREDENTIALS_PROVIDER = 'org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider' |
22 | 23 | GPU_POOLS_YAML_FILE_PATH = '/nail/srv/configs/gpu_pools.yaml' |
23 | 24 | DEFAULT_PAASTA_VOLUME_PATH = '/etc/paasta/volumes.json' |
24 | 25 | DEFAULT_SPARK_MESOS_SECRET_FILE = '/nail/etc/paasta_spark_secret' |
|
46 | 47 | 'spark.executorEnv.SPARK_EXECUTOR_DIRS', |
47 | 48 | 'spark.hadoop.fs.s3a.access.key', |
48 | 49 | 'spark.hadoop.fs.s3a.secret.key', |
49 | | - 'spark.hadoop.fs.s3a.session.token' |
| 50 | + 'spark.hadoop.fs.s3a.session.token', |
50 | 51 | 'spark.kubernetes.pyspark.pythonVersion', |
51 | 52 | 'spark.kubernetes.container.image', |
52 | 53 | 'spark.kubernetes.namespace', |
@@ -504,10 +505,11 @@ def get_spark_conf( |
504 | 505 | :param paasta_service: the service name of the job |
505 | 506 | :param paasta_instance: the instance name of the job |
506 | 507 | :param docker_img: the docker image used to launch container for spark executor. |
507 | | - :param aws_creds: the aws creds to be used for this spark job. |
| 508 | + :param aws_creds: the aws creds to be used for this spark job. If a key triplet is passed, |
| 509 | + we configure a different credentials provider to support this workflow. |
508 | 510 | :param extra_volumes: extra files to mount on the spark executors |
509 | 511 | :param extra_docker_params: extra docker parameters to launch the spark executor |
510 | | - cotnainer. This is only being used when `cluster_manager` is set to `mesos` |
| 512 | + container. This is only being used when `cluster_manager` is set to `mesos` |
511 | 513 | :param with_secret: whether the output spark config should include mesos secrets. |
512 | 514 | This is only being used when `cluster_manager` is set to `mesos` |
513 | 515 | :param needs_docker_cfg: whether we should add docker.cfg file for accessing |
@@ -544,6 +546,12 @@ def get_spark_conf( |
544 | 546 |
|
545 | 547 | spark_conf = {**(spark_opts_from_env or {}), **_filter_user_spark_opts(user_spark_opts)} |
546 | 548 |
|
| 549 | + # We automatically update the credentials provider if the session token is included. |
| 550 | + # By default the SimpleAWSCredentials provider is used, which is incompatible with |
| 551 | + # temporary credentials. More details in SEC-13906. |
| 552 | + if aws_creds[2] is not None: |
| 553 | + spark_conf['spark.hadoop.fs.s3a.aws.credentials.provider'] = AWS_TEMP_CREDENTIALS_PROVIDER |
| 554 | + |
547 | 555 | spark_conf.update({ |
548 | 556 | 'spark.app.name': app_name, |
549 | 557 | 'spark.ui.port': str(ui_port), |
|
0 commit comments