diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py index 8cd490bc8..3003b1b72 100644 --- a/.github/scripts/check-ut.py +++ b/.github/scripts/check-ut.py @@ -1,22 +1,46 @@ import argparse import sys import os +import re from junitparser import JUnitXml, Error, Failure, Skipped -parser = argparse.ArgumentParser() -parser.add_argument('junitxml', nargs='+') +parser = argparse.ArgumentParser(description='Test results analyzer') +parser.add_argument('input_files', nargs='+', help='JUnit XML files or log files') args = parser.parse_args() failures = [] -suites = [] +summaries = [] + +error_types = [ + "RuntimeError", + "ValueError", + "TypeError", + "AttributeError", + "KeyError", + "IndexError", + "ImportError", + "AssertionError", + "OSError", + "Failed", + "TimeoutError", + "asyncio.TimeoutError", + "FileNotFoundError", + "PermissionError", + "NotImplementedError", +] def get_classname(case): - return ' '.join(case.classname.split()) + return ' '.join(case.classname.split()) if hasattr(case, 'classname') else case.get('classname', '') def get_name(case): + if isinstance(case, dict): + return case.get('name', '') return ' '.join(case.name.split()) def get_result(case): + if isinstance(case, dict): + return case.get('status', 'failed') + result = "passed" if case.result: if isinstance(case.result[0], Error): @@ -28,88 +52,215 @@ def get_result(case): return result def get_message(case): + if isinstance(case, dict): + return case.get('error', '') + if not case.result: return "" - return f"{case.result[0].message.splitlines()[0]}" + full_text = case.result[0].text if hasattr(case.result[0], 'text') else case.result[0].message + if not full_text: + return "" + + error_messages = [] + capture_next_lines = False + indent_level = 0 -def print_md_row(row, print_header): + for line in full_text.splitlines(): + stripped_line = line.strip() + if not stripped_line: + continue + + for error_type in error_types: + if stripped_line.startswith(error_type + ": "): + error_msg = stripped_line[len(error_type)+2:] + error_messages.append(f"{error_type}: {error_msg}") + capture_next_lines = True + indent_level = 0 + break + elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line: + error_msg = stripped_line.split(f'{error_type}:')[-1].strip() + error_messages.append(f"{error_type}: {error_msg}") + capture_next_lines = True + indent_level = 0 + break + + return " ; ".join(error_messages) if error_messages else f"{case.result[0].message.splitlines()[0]}" + +def print_md_row(row, print_header=False, fail_list=None): if print_header: - header = " | ".join([f"{key}" for key, _ in row.items()]) + header = " | ".join([f"{key}" for key in row.keys()]) print(f"| {header} |") - header = " | ".join(["-"*len(key) for key, _ in row.items()]) + header = " | ".join(["---"] * len(row)) print(f"| {header} |") - row = " | ".join([f"{value}" for _, value in row.items()]) - print(f"| {row} |") + row_values = " | ".join([f"{value}" for value in row.values()]) + print(f"| {row_values} |") -def print_cases(cases): - print_header = True - for case in cases: - classname = get_classname(case) - name = get_name(case) - result = get_result(case) - message = get_message(case) - row = { - 'Class name': classname, - 'Test name': name, - 'Status': result, - 'Message': message, - } - print_md_row(row, print_header) - print_header = False + if fail_list is not None: + fail_list.write(f"| {row_values} |\n") + + + +def print_failures(): + if not failures: + return + + with open("ut_failure_list.csv", "w") as fail_list: + fail_list.write("sep=\'|\''.\n") + + print("### Test Failures") + print_header = True + for case in failures: + print_md_row({ + 'Class name': get_classname(case), + 'Test name': get_name(case), + 'Status': get_result(case), + 'Message': get_message(case), + 'Source': case['source'] if isinstance(case, dict) else 'XML' + }, print_header, fail_list) + print_header = False + +def parse_log_file(log_file): + with open(log_file, encoding='utf-8') as f: + content = f.read() + + ut_name = os.path.splitext(os.path.basename(log_file))[0] + summary = { + 'Category': determine_category(ut_name), + 'UT': ut_name, + 'Test cases': 0, + 'Passed': 0, + 'Skipped': 0, + 'Failures': 0, + 'Errors': 0, + 'Source': 'Log' + } + + # Extract test counts + test_run_match = re.search(r"Ran (\d+) tests in [\d.]+s", content) + if test_run_match: + summary['Test cases'] = int(test_run_match.group(1)) -def print_suite(suite): + # Extract skipped case number + skipped_match = re.search(r"skipped[ =](\d+)", content, re.IGNORECASE) + if skipped_match: + summary['Skipped'] = int(skipped_match.group(1)) + else: + skipped_match = re.search(r"skipped (\d+) cases?", content, re.IGNORECASE) + if skipped_match: + summary['Skipped'] = int(skipped_match.group(1)) + + # Extract failures + failure_blocks = re.findall(r"(FAIL:.*?)(?:\n\n|\n=+\n|\Z)", content, re.DOTALL) + exist_test_names = set() + failures_number = 0 + + for block in failure_blocks: + case_match = re.match(r"FAIL: (\w+) \(__mp_main__\.(\w+)\)", block) + if not case_match: + continue + + test_name = case_match.group(1) + if test_name in exist_test_names: + continue + exist_test_names.add(test_name) + + error_msg = [] + error_pattern = r"(" + "|".join(error_types) + r"):.*?(?=\n\S|\n\n|\n=+\n|\Z)" + error_matches = re.finditer(error_pattern, block, re.DOTALL) + if not error_matches and "Traceback" in block: + error_msg.append("Unknown error (see traceback)") + else: + for match in error_matches: + error_msg.append(match.group(0).strip()) + + failures.append({ + 'classname': ut_name, + 'name': f"{case_match.group(2)}:{test_name}", + 'error': " ".join(error_msg), + 'status': 'failed', + 'source': 'Log' + }) + failures_number += 1 + + if failures_number > summary['Failures']: + summary['Failures'] = failures_number + summary['Passed'] = summary['Test cases'] - summary['Failures'] - summary['Skipped'] + + return summary + +def determine_category(ut): + if ut == 'op_regression': + return 'op_regression' + elif ut == 'op_regression_dev1': + return 'op_regression_dev1' + elif ut == 'op_extended': + return 'op_extended' + elif 'op_ut' in ut: + return 'op_ut' + else: + return 'unknown' + +def process_log_file(log_file): + try: + summary = parse_log_file(log_file) + summaries.append(summary) + except Exception as e: + print(f"Error processing {log_file}: {e}", file=sys.stderr) + +def process_xml_file(xml_file): + try: + xml = JUnitXml.fromfile(xml_file) + ut = os.path.basename(xml_file).split('.')[0] + category = determine_category(ut) + + for suite in xml: + suite_summary = { + 'Category': category, + 'UT': ut, + 'Test cases': suite.tests, + 'Passed': suite.tests - suite.skipped - suite.failures - suite.errors, + 'Skipped': suite.skipped, + 'Failures': suite.failures, + 'Errors': suite.errors, + 'Source': 'XML' + } + summaries.append(suite_summary) + + for case in suite: + if get_result(case) not in ["passed", "skipped"]: + failures.append(case) + except Exception as e: + print(f"Error processing {xml_file}: {e}", file=sys.stderr) + +def print_summary(): + print("### Results Summary") print_header = True - for suite in suites: - ut = args.junitxml[0] - del(args.junitxml[0]) - ut = os.path.basename(ut).split('.')[0] - tests = suite.tests - skipped = suite.skipped - failures = suite.failures - errors = suite.errors - if ut == 'op_regression': - category = 'op_regression' - elif ut == 'op_regression_dev1': - category = 'op_regression_dev1' - elif ut == 'op_extended': - category = 'op_extended' - elif 'op_ut' in ut: - category = 'op_ut' - row = { - 'Category': category, - 'UT': ut, - 'Test cases': tests, - 'Passed': tests-skipped-failures-errors, - 'Skipped': skipped, - 'Failures': failures, - 'Errors': errors, - } - print_md_row(row, print_header) + + for summary in summaries: + print_md_row({ + 'Category': summary['Category'], + 'UT': summary['UT'], + 'Test cases': summary['Test cases'], + 'Passed': summary['Passed'], + 'Skipped': summary['Skipped'], + 'Failures': summary['Failures'], + 'Errors': summary['Errors'], + 'Source': summary['Source'] + }, print_header) print_header = False -xmls = [ JUnitXml.fromfile(f) for f in args.junitxml ] -for idx, xml in enumerate(xmls): - for suite in xml: - suites.append(suite) - for case in suite: - classname = get_classname(case) - name = get_name(case) - result = get_result(case) - if result not in ["passed", "skipped"]: - failures.append(case) - -printed = False -def print_break(needed): - if needed: - print("") - -if failures: - print_break(printed) - print("### Failures") - print_cases(failures) - printed = True - -print("### Results Summary") -print_suite(suites) - -sys.exit(0) +def main(): + for input_file in args.input_files: + if input_file.endswith('.log'): + process_log_file(input_file) + elif input_file.endswith('.xml'): + process_xml_file(input_file) + else: + print(f"Skipping unknown file type: {input_file}", file=sys.stderr) + + print_failures() + print_summary() + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/ut_result_check.sh b/.github/scripts/ut_result_check.sh index 3fb1a1997..eb62a2da0 100644 --- a/.github/scripts/ut_result_check.sh +++ b/.github/scripts/ut_result_check.sh @@ -1,10 +1,55 @@ #!/bin/bash ut_suite="${1:-op_regression}" # op_regression / op_extended / op_ut / torch_xpu +# usage +# compare_and_filter_logs [output.log] + +compare_and_filter_logs() { + local file_UT="$1" + local file_known_issue="$2" + local output_file="${3:-${file_UT%.*}_filtered.log}" + local filtered_content="${file_UT%.*}_removed.log" + + if [[ $# -lt 2 ]]; then + echo "[ERROR] Need 2 files to compare" + return 1 + fi + + # Check whether UT's failed log contains the case of the known issue'log + echo "Checking whether $file_UT contains $file_known_issue" + if grep -qFf "$file_known_issue" "$file_UT"; then + echo "$file_UT contains $file_known_issue" + else + echo "$file_UT does not contain $file_known_issue" + return 1 + fi + + # Filter the same content from file_UT as file_known_issue + echo "Filtering $file_known_issue for $file_UT" + grep -vFf "$file_known_issue" "$file_UT" > "$output_file" + + # Keep the filtered UT cases + grep -nFf "$file_known_issue" "$file_UT" > "$filtered_content" + echo "Filtered cases file: $filtered_content" + if [[ -s "$filtered_content" ]]; then + echo -e "\n\033[1;31m[Filtered Cases]\033[0m" + awk -F':' '{ + line_number = $1 + $1 = "" + gsub(/^ /, "", $0) + printf "\033[33m%3d\033[0m: %s\n", line_number, $0 + }' "$filtered_content" + else + echo -e "\n\033[1;32mNo Filtered Cases\033[0m" + fi +} + if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_regression_dev1' || "${ut_suite}" == 'op_extended' ]]; then - grep -E "^FAILED|have failures" "${ut_suite}"_test.log | awk '{print $2}' > ./"${ut_suite}"_failed.log + grep -E "^FAILED" "${ut_suite}"_test.log | awk '{print $2}' > ./"${ut_suite}"_failed.log + grep -E "have failures" "${ut_suite}"_test.log | awk '{print $1}' >> ./"${ut_suite}"_failed.log grep "PASSED" "${ut_suite}"_test.log | awk '{print $1}' > ./"${ut_suite}"_passed.log - num_failed=$(wc -l < "./${ut_suite}_failed.log") + compare_and_filter_logs "${ut_suite}"_failed.log Known_issue.log + num_failed=$(wc -l < "./${ut_suite}_failed_filtered.log") num_passed=$(wc -l < "./${ut_suite}_passed.log") echo -e "=========================================================================" echo -e "Show Failed cases in ${ut_suite}" @@ -18,10 +63,14 @@ if [[ "${ut_suite}" == 'op_regression' || "${ut_suite}" == 'op_regression_dev1' fi fi if [[ "${ut_suite}" == 'op_ut' ]]; then - grep -E "^FAILED|have failures" op_ut_with_skip_test.log | awk '{print $2}' > ./"${ut_suite}"_with_skip_test_failed.log - grep -E "^FAILED|have failures" op_ut_with_only_test.log | awk '{print $2}' > ./"${ut_suite}"_with_only_test_failed.log - num_failed_with_skip=$(wc -l < "./${ut_suite}_with_skip_test_failed.log") - num_failed_with_only=$(wc -l < "./${ut_suite}_with_only_test_failed.log") + grep -E "^FAILED" op_ut_with_skip_test.log | awk '{print $2}' > ./"${ut_suite}"_with_skip_test_failed.log + grep -E "have failures" op_ut_with_skip_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_skip_test_failed.log + grep -E "^FAILED" op_ut_with_only_test.log | awk '{print $2}' > ./"${ut_suite}"_with_only_test_failed.log + grep -E "have failures" op_ut_with_only_test.log | awk '{print $1}' >> ./"${ut_suite}"_with_only_test_failed.log + compare_and_filter_logs "${ut_suite}"_with_skip_test_failed.log Known_issue.log + num_failed_with_skip=$(wc -l < "./${ut_suite}_with_skip_test_failed_filtered.log") + compare_and_filter_logs "${ut_suite}"_with_only_test_failed.log Known_issue.log + num_failed_with_only=$(wc -l < "./${ut_suite}_with_only_test_failed_filtered.log") echo -e "=========================================================================" echo -e "Show Failed cases in ${ut_suite} with skip" echo -e "=========================================================================" @@ -73,8 +122,10 @@ if [[ "${ut_suite}" == 'torch_xpu' ]]; then fi fi if [[ "${ut_suite}" == 'xpu_distributed' ]]; then - grep -E "^FAILED|have failures" xpu_distributed_test.log | awk '{print $2}' > ./"${ut_suite}"_xpu_distributed_test_failed.log - num_failed_xpu_distributed=$(wc -l < "./${ut_suite}_xpu_distributed_test_failed.log") + grep -E "^FAILED" xpu_distributed_test.log | awk '{print $2}' > ./"${ut_suite}"_xpu_distributed_test_failed.log + grep -E "have failures" xpu_distributed_test.log | awk '{print $1}' >> ./"${ut_suite}"_xpu_distributed_test_failed.log + compare_and_filter_logs "${ut_suite}"_xpu_distributed_test_failed.log Known_issue.log + num_failed_xpu_distributed=$(wc -l < "./${ut_suite}_xpu_distributed_test_failed_filtered.log") echo -e "=========================================================================" echo -e "Show Failed cases in ${ut_suite} xpu distributed" echo -e "=========================================================================" diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index f0f8ea42f..537ce29ab 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -49,6 +49,8 @@ jobs: env: NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }} + commit_issue: 1624 + GH_TOKEN: ${{ github.token }} steps: - name: Checkout torch-xpu-ops uses: actions/checkout@v4 @@ -175,6 +177,18 @@ jobs: cd ../pytorch/third_party/torch-xpu-ops/test/xpu timeout 10000 python run_test_with_skip.py 2>${{ github.workspace }}/ut_log/op_ut/op_ut_with_skip_test_error.log | tee ${{ github.workspace }}/ut_log/op_ut/op_ut_with_skip_test.log cp *.xml ${{ github.workspace }}/ut_log + find op_ut_with_skip_nn op_ut_with_skip_quantization/core -type f -exec sh -c ' + dir_path=$(dirname "$1"); + case "$dir_path" in + *"op_ut_with_skip_quantization/core"*) + dir_name="op_ut_with_skip_quantization_core";; + *) + dir_name=$(basename "$dir_path");; + esac; + mv "$1" "$dir_path/${dir_name}_$(basename "$1")" + ' _ {} \; + cp op_ut_with_skip_nn/*.xml ${{ github.workspace }}/ut_log + cp op_ut_with_skip_quantization/core/*.xml ${{ github.workspace }}/ut_log # Cases run with a on-demand white list, since some suites are too # slow to go through all operators on CPU. So add cases on-demand # when XPU implementatoin is done. @@ -245,9 +259,12 @@ jobs: source activate xpu_op_${ZE_AFFINITY_MASK} pip install junitparser python .github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true + cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv - name: UT Test Results Check shell: bash run: | + test_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + repo="${{ github.repository }}" function contains() { contains_status="echo 'Start $2 ...'" { @@ -264,6 +281,7 @@ jobs: contains "op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu" $ut_suite $contains_status cd ${{ github.workspace }}/ut_log/${ut_suite} + gh --repo $repo issue view $commit_issue --json body -q .body | sed '/^$/d' > Known_issue.log cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./ bash ut_result_check.sh ${ut_suite} done @@ -273,6 +291,12 @@ jobs: with: name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }} path: ${{ github.workspace }}/ut_log + - name: Upload OP UT failure list + if: always() + uses: actions/upload-artifact@v4 + with: + name: UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }} + path: ${{ github.workspace }}/ut_log/ut_failure_list.csv distributed_ut_test: runs-on: pvc_e2e @@ -281,6 +305,8 @@ jobs: env: NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }} + commit_issue: 1624 + GH_TOKEN: ${{ github.token }} steps: - name: Checkout torch-xpu-ops uses: actions/checkout@v4 @@ -383,6 +409,8 @@ jobs: - name: UT Test Results Check shell: bash run: | + test_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + repo="${{ github.repository }}" function contains() { contains_status="echo 'Start $2 ...'" { @@ -395,6 +423,7 @@ jobs: set -xe echo "UT_NAME=$(echo ${{ inputs.ut }} |sed 's/,/-/g')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" cd ${{ github.workspace }}/ut_log/xpu_distributed + gh --repo $repo issue view $commit_issue --json body -q .body | sed '/^$/d' > Known_issue.log cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./ bash ut_result_check.sh 'xpu_distributed' - name: Upload Inductor XPU UT Log diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index 0de152f27..9fd126a9f 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -6,7 +6,7 @@ "float8", # workarounds for the following tests # https://github.com/intel/torch-xpu-ops/issues/1214 - "test_python_ref__refs_exp_xpu_complex128", + # "test_python_ref__refs_exp_xpu_complex128", "test_python_ref__refs_sigmoid_xpu_complex128", "test_python_ref_executor__refs_log2_executor_aten_xpu_complex128", "test_python_ref_executor__refs_exp_executor_aten_xpu_complex128", @@ -35,7 +35,7 @@ # AssertionError: The supported dtypes for sparse.sampled_addmm on device type xpu are incorrect! - OPs not supported "test_dtypes_sparse_sampled_addmm_xpu", # OPs not supported - "test_errors_dot_xpu", + # "test_errors_dot_xpu", "test_errors_vdot_xpu", # Linalg OPs not supported "test_noncontiguous_samples_linalg_det_xpu_float32", @@ -54,7 +54,7 @@ # RuntimeError: Long is not supported in oneDNN! # RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive # RuntimeError: Double and complex datatype matmul is not supported in oneDNN - "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64", + # "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64", "test_noncontiguous_samples_nn_functional_conv_transpose1d_xpu_int64", "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_complex64", "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_float32", @@ -666,7 +666,7 @@ # Expected 9.183549615799121e-41 but got 0.0. # Absolute difference: 9.183549615799121e-41 # Relative difference: 1.0 - "test_nextafter_bfloat16_xpu_bfloat16", + # "test_nextafter_bfloat16_xpu_bfloat16", ), "test_scatter_gather_ops_xpu.py": ( # AssertionError: Tensor-likes are not equal! @@ -681,7 +681,7 @@ ), # Hard code CUDA, UT has already been rewritten to test/regressions/test_sort.py. "nn/test_embedding_xpu.py": ( # NotImplementedError: Could not run 'aten::_indices' with arguments from the 'SparseXPU' backend. - "test_embedding_bag_device_xpu_int32_int32_float16", + # "test_embedding_bag_device_xpu_int32_int32_float16", "test_embedding_bag_device_xpu_int32_int32_float32", "test_embedding_bag_device_xpu_int32_int32_float64", "test_embedding_bag_device_xpu_int32_int64_float16",