Skip to content

Commit ebaf3bf

Browse files
authored
Update spark app id naming convention and limit length (#125)
* Update spark app id naming convention and limit length * Double underscore and mod timestamp
1 parent 457b5d1 commit ebaf3bf

File tree

3 files changed

+21
-4
lines changed

3 files changed

+21
-4
lines changed

service_configuration_lib/spark_config.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,12 +1089,17 @@ def get_spark_conf(
10891089
# We want to make the app name more unique so that we can search it
10901090
# from history server.
10911091
app_name = f'{app_base_name}_{ui_port}_{int(time.time())}'
1092+
is_jupyter = _is_jupyterhub_job(app_name)
10921093

10931094
# Explicitly setting app id: replace special characters to '_' to make it consistent
10941095
# in all places for metric systems:
10951096
# - since in the Promehteus metrics endpoint those will be converted to '_'
10961097
# - while the 'spark-app-selector' executor pod label will keep the original app id
1097-
app_id = re.sub(r'[\.,-]', '_', app_name)
1098+
if is_jupyter:
1099+
raw_app_id = app_name
1100+
else:
1101+
raw_app_id = f'{paasta_service}__{paasta_instance}__{int(time.time()) % 10000}'
1102+
app_id = re.sub(r'[\.,-]', '_', _get_k8s_resource_name_limit_size_with_hash(raw_app_id))
10981103

10991104
spark_conf.update({
11001105
'spark.app.name': app_name,
@@ -1149,7 +1154,7 @@ def get_spark_conf(
11491154
self.update_spark_srv_configs(spark_conf)
11501155

11511156
# configure spark Console Progress
1152-
if _is_jupyterhub_job(spark_conf.get('spark.app.name', '')):
1157+
if is_jupyter:
11531158
spark_conf = _append_spark_config(spark_conf, 'spark.ui.showConsoleProgress', 'true')
11541159

11551160
spark_conf = _append_aws_credentials_conf(spark_conf, *aws_creds, aws_region)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
setup(
1919
name='service-configuration-lib',
20-
version='2.18.4',
20+
version='2.18.5',
2121
provides=['service_configuration_lib'],
2222
description='Start, stop, and inspect Yelp SOA services',
2323
url='https://github.com/Yelp/service_configuration_lib',

tests/spark_config_test.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1143,7 +1143,19 @@ def verify(output):
11431143
def assert_app_id(self):
11441144
def verify(output):
11451145
key = 'spark.app.id'
1146-
assert output[key] == re.sub(r'[\.,-]', '_', output['spark.app.name'])
1146+
app_name = output['spark.app.name']
1147+
is_jupyter = 'jupyterhub' in app_name
1148+
paasta_service = output['spark.executorEnv.PAASTA_SERVICE']
1149+
paasta_instance = output['spark.executorEnv.PAASTA_INSTANCE']
1150+
1151+
if is_jupyter:
1152+
raw_app_id_prefix = app_name
1153+
else:
1154+
raw_app_id_prefix = f'{paasta_service}__{paasta_instance}__'
1155+
app_id_prefix = re.sub(r'[\.,-]', '_', raw_app_id_prefix)
1156+
output_app_id = output[key]
1157+
assert output_app_id.startswith(app_id_prefix)
1158+
assert len(output_app_id) <= 63
11471159
return [key]
11481160
return verify
11491161

0 commit comments

Comments
 (0)