Skip to content

Commit 0c627cb

Browse files
authored
Merge pull request #60 from Yelp/u/srojas/SEC-13906-automatically-configure-temp-creds-provider-if-session-token-is-found
Automatically Configure TemporaryAWSCredentialsProvider if AWS Session Token Is Included in Session Credentials
2 parents 15ec932 + 47a08fc commit 0c627cb

File tree

3 files changed

+39
-4
lines changed

3 files changed

+39
-4
lines changed

service_configuration_lib/spark_config.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from boto3 import Session
2020

2121
AWS_CREDENTIALS_DIR = '/etc/boto_cfg/'
22+
AWS_TEMP_CREDENTIALS_PROVIDER = 'org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider'
2223
GPU_POOLS_YAML_FILE_PATH = '/nail/srv/configs/gpu_pools.yaml'
2324
DEFAULT_PAASTA_VOLUME_PATH = '/etc/paasta/volumes.json'
2425
DEFAULT_SPARK_MESOS_SECRET_FILE = '/nail/etc/paasta_spark_secret'
@@ -46,7 +47,7 @@
4647
'spark.executorEnv.SPARK_EXECUTOR_DIRS',
4748
'spark.hadoop.fs.s3a.access.key',
4849
'spark.hadoop.fs.s3a.secret.key',
49-
'spark.hadoop.fs.s3a.session.token'
50+
'spark.hadoop.fs.s3a.session.token',
5051
'spark.kubernetes.pyspark.pythonVersion',
5152
'spark.kubernetes.container.image',
5253
'spark.kubernetes.namespace',
@@ -504,10 +505,11 @@ def get_spark_conf(
504505
:param paasta_service: the service name of the job
505506
:param paasta_instance: the instance name of the job
506507
:param docker_img: the docker image used to launch container for spark executor.
507-
:param aws_creds: the aws creds to be used for this spark job.
508+
:param aws_creds: the aws creds to be used for this spark job. If a key triplet is passed,
509+
we configure a different credentials provider to support this workflow.
508510
:param extra_volumes: extra files to mount on the spark executors
509511
:param extra_docker_params: extra docker parameters to launch the spark executor
510-
cotnainer. This is only being used when `cluster_manager` is set to `mesos`
512+
container. This is only being used when `cluster_manager` is set to `mesos`
511513
:param with_secret: whether the output spark config should include mesos secrets.
512514
This is only being used when `cluster_manager` is set to `mesos`
513515
:param needs_docker_cfg: whether we should add docker.cfg file for accessing
@@ -544,6 +546,12 @@ def get_spark_conf(
544546

545547
spark_conf = {**(spark_opts_from_env or {}), **_filter_user_spark_opts(user_spark_opts)}
546548

549+
# We automatically update the credentials provider if the session token is included.
550+
# By default the SimpleAWSCredentials provider is used, which is incompatible with
551+
# temporary credentials. More details in SEC-13906.
552+
if aws_creds[2] is not None:
553+
spark_conf['spark.hadoop.fs.s3a.aws.credentials.provider'] = AWS_TEMP_CREDENTIALS_PROVIDER
554+
547555
spark_conf.update({
548556
'spark.app.name': app_name,
549557
'spark.ui.port': str(ui_port),

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
setup(
1919
name='service-configuration-lib',
20-
version='2.5.2',
20+
version='2.5.3',
2121
provides=['service_configuration_lib'],
2222
description='Start, stop, and inspect Yelp SOA services',
2323
url='https://github.com/Yelp/service_configuration_lib',

tests/spark_config_test.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ class TestGetSparkConf:
149149
spark_app_base_name = 'test_app_base_name'
150150
extra_volumes = [{'hostPath': '/tmp', 'containerPath': '/tmp', 'mode': 'RO'}]
151151
default_mesos_leader = 'mesos://some-url.yelp.com:5050'
152+
aws_provider_key = 'spark.hadoop.fs.s3a.aws.credentials.provider'
152153

153154
@pytest.fixture
154155
def mock_paasta_volumes(self, monkeypatch, tmpdir):
@@ -632,6 +633,32 @@ def verify(output):
632633

633634
return verify
634635

636+
def test_get_spark_conf_aws_session(self):
637+
other_spark_opts = {'spark.driver.memory': '2g', 'spark.executor.memoryOverhead': '1024'}
638+
not_allowed_opts = {'spark.executorEnv.PAASTA_SERVICE': 'random-service'}
639+
user_spark_opts = {
640+
**({}),
641+
**not_allowed_opts,
642+
**other_spark_opts,
643+
}
644+
645+
aws_creds = ('key', 'secret', 'token')
646+
647+
output = spark_config.get_spark_conf(
648+
cluster_manager='kubernetes',
649+
spark_app_base_name=self.spark_app_base_name,
650+
user_spark_opts=user_spark_opts,
651+
paasta_cluster=self.cluster,
652+
paasta_pool=self.pool,
653+
paasta_service=self.service,
654+
paasta_instance=self.instance,
655+
docker_img=self.docker_image,
656+
extra_volumes=self.extra_volumes,
657+
aws_creds=aws_creds,
658+
)
659+
assert self.aws_provider_key in output.keys()
660+
assert 'org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider' == output[self.aws_provider_key]
661+
635662
def test_get_spark_conf_mesos(
636663
self,
637664
user_spark_opts,

0 commit comments

Comments
 (0)