@@ -1030,7 +1030,6 @@ def get_spark_conf(
10301030 aws_region : Optional [str ] = None ,
10311031 service_account_name : Optional [str ] = None ,
10321032 force_spark_resource_configs : bool = True ,
1033- spark_app_id : str = '' , # to be removed once verified all applications are not explicitly setting app id
10341033 ) -> Dict [str , str ]:
10351034 """Build spark config dict to run with spark on paasta
10361035
@@ -1098,19 +1097,12 @@ def get_spark_conf(
10981097 # in all places for metric systems:
10991098 # - since in the Promehteus metrics endpoint those will be converted to '_'
11001099 # - while the 'spark-app-selector' executor pod label will keep the original app id
1101- if len (spark_app_id ) == 0 :
1102- if is_jupyter :
1103- raw_app_id = app_name
1104- else :
1105- random_postfix = '' .join (random .choice (string .ascii_lowercase + string .digits ) for _ in range (4 ))
1106- raw_app_id = f'{ paasta_service } __{ paasta_instance } __{ random_postfix } '
1107- app_id = re .sub (r'[\.,-]' , '_' , _get_k8s_resource_name_limit_size_with_hash (raw_app_id ))
1100+ if is_jupyter :
1101+ raw_app_id = app_name
11081102 else :
1109- log .warning (
1110- 'We do not recommend users to set spark.app.id, as it could diminish the clarity of identification '
1111- 'and can potentially cause the monitoring dashboard to not work properly.' ,
1112- )
1113- app_id = spark_app_id
1103+ random_postfix = '' .join (random .choice (string .ascii_lowercase + string .digits ) for _ in range (4 ))
1104+ raw_app_id = f'{ paasta_service } __{ paasta_instance } __{ random_postfix } '
1105+ app_id = re .sub (r'[\.,-]' , '_' , _get_k8s_resource_name_limit_size_with_hash (raw_app_id ))
11141106
11151107 spark_conf .update ({
11161108 'spark.app.name' : app_name ,
0 commit comments