88from functools import lru_cache
99from glob import glob
1010import urllib .parse
11+ import re
1112
1213import pandas as pd
1314from jinja2 import Environment , FileSystemLoader
1415import requests
1516from clickhouse_driver import Client
1617import boto3
1718from botocore .exceptions import NoCredentialsError
19+ import yaml
20+
1821
1922DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
2023DATABASE_USER_VAR = "CLICKHOUSE_TEST_STAT_LOGIN"
@@ -166,6 +169,63 @@ def get_checks_fails(client: Client, commit_sha: str, branch_name: str):
166169 return client .query_dataframe (query )
167170
168171
172+ def get_broken_tests_rules (broken_tests_file_path ):
173+ with open (broken_tests_file_path , "r" , encoding = "utf-8" ) as broken_tests_file :
174+ broken_tests = yaml .safe_load (broken_tests_file )
175+
176+ compiled_rules = {"exact" : {}, "pattern" : {}}
177+
178+ for test in broken_tests :
179+ regex = test .get ("regex" ) is True
180+ rule = {
181+ "reason" : test ["reason" ],
182+ "message" : re .compile (test ["message" ]) if regex else test ["message" ],
183+ }
184+
185+ if test .get ("not_message" ):
186+ rule ["not_message" ] = (
187+ re .compile (test ["not_message" ]) if regex else test ["not_message" ]
188+ )
189+ if test .get ("check_types" ):
190+ rule ["check_types" ] = test ["check_types" ]
191+
192+ if regex :
193+ rule ["regex" ] = True
194+ compiled_rules ["pattern" ][re .compile (test ["name" ])] = rule
195+ else :
196+ compiled_rules ["exact" ][test ["name" ]] = rule
197+
198+ return compiled_rules
199+
200+
201+ def get_known_fail_reason (test_name : str , check_name : str , known_fails : dict ):
202+ """
203+ Returns the reason why a test is known to fail based on its name and build context.
204+
205+ - Exact-name rules are checked first.
206+ - Pattern-name rules are checked next (first match wins).
207+ - Message/not_message conditions are ignored.
208+ """
209+ # 1. Exact-name rules
210+ rule_data = known_fails ["exact" ].get (test_name )
211+ if rule_data :
212+ if any (
213+ check_type in check_name for check_type in rule_data .get ("check_types" , [])
214+ ):
215+ return rule_data ["reason" ]
216+
217+ # 2. Pattern-name rules
218+ for name_re , rule_data in known_fails ["pattern" ].items ():
219+ if name_re .fullmatch (test_name ):
220+ if any (
221+ check_type in check_name
222+ for check_type in rule_data .get ("check_types" , [])
223+ ):
224+ return rule_data ["reason" ]
225+
226+ return "No reason given"
227+
228+
169229def get_checks_known_fails (
170230 client : Client , commit_sha : str , branch_name : str , known_fails : dict
171231):
@@ -189,7 +249,6 @@ def get_checks_known_fails(
189249 GROUP BY check_name, test_name, report_url, task_url
190250 )
191251 WHERE test_status='BROKEN'
192- AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
193252 ORDER BY job_name, test_name
194253 """
195254
@@ -198,10 +257,11 @@ def get_checks_known_fails(
198257 df .insert (
199258 len (df .columns ) - 1 ,
200259 "reason" ,
201- df ["test_name" ]
202- .astype (str )
203- .apply (
204- lambda test_name : known_fails [test_name ].get ("reason" , "No reason given" )
260+ df .apply (
261+ lambda row : get_known_fail_reason (
262+ row ["test_name" ], row ["check_name" ], known_fails
263+ ),
264+ axis = 1 ,
205265 ),
206266 )
207267
@@ -654,7 +714,7 @@ def create_workflow_report(
654714 pr_number : int = None ,
655715 commit_sha : str = None ,
656716 no_upload : bool = False ,
657- known_fails : str = None ,
717+ known_fails_file_path : str = None ,
658718 check_cves : bool = False ,
659719 mark_preview : bool = False ,
660720) -> str :
@@ -710,15 +770,12 @@ def create_workflow_report(
710770 # This might occur when run in preview mode.
711771 cves_not_checked = not check_cves or fail_results ["docker_images_cves" ] is ...
712772
713- if known_fails :
714- if not os .path .exists (known_fails ):
715- print (f"Known fails file { known_fails } not found." )
716- exit (1 )
717-
718- with open (known_fails ) as f :
719- known_fails = json .load (f )
773+ if known_fails_file_path :
774+ if not os .path .exists (known_fails_file_path ):
775+ print (f"WARNING:Known fails file { known_fails_file_path } not found." )
776+ else :
777+ known_fails = get_broken_tests_rules (known_fails_file_path )
720778
721- if known_fails :
722779 fail_results ["checks_known_fails" ] = get_checks_known_fails (
723780 db_client , commit_sha , branch_name , known_fails
724781 )
0 commit comments