Skip to content

Commit 457b5d1

Browse files
authored
Setting spark.app.id to a more intuitive name (#124)
* Setting spark.app.id to a more intuitive name * Update tests * Replace more special characters
1 parent d62f4fe commit 457b5d1

File tree

3 files changed

+28
-5
lines changed

3 files changed

+28
-5
lines changed

service_configuration_lib/spark_config.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
import math
88
import os
9+
import re
910
import time
1011
from typing import Any
1112
from typing import Dict
@@ -37,6 +38,7 @@
3738

3839
NON_CONFIGURABLE_SPARK_OPTS = {
3940
'spark.master',
41+
'spark.app.id',
4042
'spark.ui.port',
4143
'spark.mesos.principal',
4244
'spark.mesos.secret',
@@ -1076,20 +1078,27 @@ def get_spark_conf(
10761078
_pick_random_port(PREFERRED_SPARK_UI_PORT),
10771079
)
10781080

1081+
spark_conf = {**(spark_opts_from_env or {}), **_filter_user_spark_opts(user_spark_opts)}
1082+
1083+
if aws_creds[2] is not None:
1084+
spark_conf['spark.hadoop.fs.s3a.aws.credentials.provider'] = AWS_ENV_CREDENTIALS_PROVIDER
1085+
10791086
# app_name from env is already appended port and time to make it unique
10801087
app_name = (spark_opts_from_env or {}).get('spark.app.name')
10811088
if not app_name:
10821089
# We want to make the app name more unique so that we can search it
10831090
# from history server.
10841091
app_name = f'{app_base_name}_{ui_port}_{int(time.time())}'
10851092

1086-
spark_conf = {**(spark_opts_from_env or {}), **_filter_user_spark_opts(user_spark_opts)}
1087-
1088-
if aws_creds[2] is not None:
1089-
spark_conf['spark.hadoop.fs.s3a.aws.credentials.provider'] = AWS_ENV_CREDENTIALS_PROVIDER
1093+
# Explicitly setting app id: replace special characters to '_' to make it consistent
1094+
# in all places for metric systems:
1095+
# - since in the Promehteus metrics endpoint those will be converted to '_'
1096+
# - while the 'spark-app-selector' executor pod label will keep the original app id
1097+
app_id = re.sub(r'[\.,-]', '_', app_name)
10901098

10911099
spark_conf.update({
10921100
'spark.app.name': app_name,
1101+
'spark.app.id': app_id,
10931102
'spark.ui.port': str(ui_port),
10941103
})
10951104

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
setup(
1919
name='service-configuration-lib',
20-
version='2.18.3',
20+
version='2.18.4',
2121
provides=['service_configuration_lib'],
2222
description='Start, stop, and inspect Yelp SOA services',
2323
url='https://github.com/Yelp/service_configuration_lib',

tests/spark_config_test.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import itertools
33
import json
44
import os
5+
import re
56
import sys
67
from unittest import mock
78

@@ -1138,6 +1139,14 @@ def verify(output):
11381139
return [key]
11391140
return verify
11401141

1142+
@pytest.fixture
1143+
def assert_app_id(self):
1144+
def verify(output):
1145+
key = 'spark.app.id'
1146+
assert output[key] == re.sub(r'[\.,-]', '_', output['spark.app.name'])
1147+
return [key]
1148+
return verify
1149+
11411150
@pytest.fixture
11421151
def assert_mesos_conf(self):
11431152
def verify(output):
@@ -1230,6 +1239,7 @@ def test_leaders_get_spark_conf_kubernetes(
12301239
mock_time,
12311240
assert_ui_port,
12321241
assert_app_name,
1242+
assert_app_id,
12331243
assert_kubernetes_conf,
12341244
mock_log,
12351245
):
@@ -1262,6 +1272,7 @@ def test_leaders_get_spark_conf_kubernetes(
12621272
verified_keys = set(
12631273
assert_ui_port(output) +
12641274
assert_app_name(output) +
1275+
assert_app_id(output) +
12651276
assert_kubernetes_conf(output) +
12661277
list(other_spark_opts.keys()) +
12671278
list(mock_adjust_spark_requested_resources_kubernetes.return_value.keys()) +
@@ -1321,6 +1332,7 @@ def test_show_console_progress_jupyter(
13211332
mock_time,
13221333
assert_ui_port,
13231334
assert_app_name,
1335+
assert_app_id,
13241336
assert_local_conf,
13251337
mock_log,
13261338
):
@@ -1361,6 +1373,7 @@ def test_local_spark(
13611373
mock_time,
13621374
assert_ui_port,
13631375
assert_app_name,
1376+
assert_app_id,
13641377
assert_local_conf,
13651378
mock_log,
13661379
):
@@ -1385,6 +1398,7 @@ def test_local_spark(
13851398
verified_keys = set(
13861399
assert_ui_port(output) +
13871400
assert_app_name(output) +
1401+
assert_app_id(output) +
13881402
assert_local_conf(output) +
13891403
list(mock_append_spark_prometheus_conf.return_value.keys()) +
13901404
list(mock_append_event_log_conf.return_value.keys()) +

0 commit comments

Comments
 (0)