Skip to content

Commit 5438d0c

Browse files
authored
Support explicitly setting spark app id (#126)
* Support explicitly setting spark app id * Bump version
1 parent ebaf3bf commit 5438d0c

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

service_configuration_lib/spark_config.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import logging
77
import math
88
import os
9+
import random
910
import re
11+
import string
1012
import time
1113
from typing import Any
1214
from typing import Dict
@@ -38,7 +40,6 @@
3840

3941
NON_CONFIGURABLE_SPARK_OPTS = {
4042
'spark.master',
41-
'spark.app.id',
4243
'spark.ui.port',
4344
'spark.mesos.principal',
4445
'spark.mesos.secret',
@@ -1029,6 +1030,7 @@ def get_spark_conf(
10291030
aws_region: Optional[str] = None,
10301031
service_account_name: Optional[str] = None,
10311032
force_spark_resource_configs: bool = True,
1033+
spark_app_id: str = '', # to be removed once verified all applications are not explicitly setting app id
10321034
) -> Dict[str, str]:
10331035
"""Build spark config dict to run with spark on paasta
10341036
@@ -1057,6 +1059,7 @@ def get_spark_conf(
10571059
If not provided, it uses cert files at {K8S_AUTH_FOLDER} to authenticate.
10581060
:param force_spark_resource_configs: skip the resource/instances recalculation.
10591061
This is strongly not recommended.
1062+
:param explcitly setting spark.app.id
10601063
:returns: spark opts in a dict.
10611064
"""
10621065
# Mesos deprecation
@@ -1095,11 +1098,19 @@ def get_spark_conf(
10951098
# in all places for metric systems:
10961099
# - since in the Promehteus metrics endpoint those will be converted to '_'
10971100
# - while the 'spark-app-selector' executor pod label will keep the original app id
1098-
if is_jupyter:
1099-
raw_app_id = app_name
1101+
if len(spark_app_id) == 0:
1102+
if is_jupyter:
1103+
raw_app_id = app_name
1104+
else:
1105+
random_postfix = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(4))
1106+
raw_app_id = f'{paasta_service}__{paasta_instance}__{random_postfix}'
1107+
app_id = re.sub(r'[\.,-]', '_', _get_k8s_resource_name_limit_size_with_hash(raw_app_id))
11001108
else:
1101-
raw_app_id = f'{paasta_service}__{paasta_instance}__{int(time.time()) % 10000}'
1102-
app_id = re.sub(r'[\.,-]', '_', _get_k8s_resource_name_limit_size_with_hash(raw_app_id))
1109+
log.warning(
1110+
'We do not recommend users to set spark.app.id, as it could diminish the clarity of identification '
1111+
'and can potentially cause the monitoring dashboard to not work properly.',
1112+
)
1113+
app_id = spark_app_id
11031114

11041115
spark_conf.update({
11051116
'spark.app.name': app_name,

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
setup(
1919
name='service-configuration-lib',
20-
version='2.18.5',
20+
version='2.18.6',
2121
provides=['service_configuration_lib'],
2222
description='Start, stop, and inspect Yelp SOA services',
2323
url='https://github.com/Yelp/service_configuration_lib',

0 commit comments

Comments
 (0)