From 237b1bff1140c3e4b8b2fb2b3982cc1ef0fd56c2 Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Thu, 15 May 2025 00:04:38 -0700 Subject: [PATCH 1/6] upload a ut_failure_list.csv as artifacts --- .github/scripts/check-ut.py | 70 +++++++++++++++++++++++---------- .github/workflows/_linux_ut.yml | 9 +++++ 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py index 5758c4e6d1..19d9881b0d 100644 --- a/.github/scripts/check-ut.py +++ b/.github/scripts/check-ut.py @@ -20,9 +20,9 @@ "IndexError", "ImportError", "AssertionError", - "Exception", + #"Exception", "OSError", - "Failed", + #"Failed", "TimeoutError", "asyncio.TimeoutError", "FileNotFoundError", @@ -65,29 +65,52 @@ def get_message(case): error_messages = [] capture_next_lines = False indent_level = 0 - + collect_trace = False + collect_error = False + import pdb + pdb.set_trace() for line in full_text.splitlines(): stripped_line = line.strip() if not stripped_line: continue - for error_type in error_types: - if stripped_line.startswith(error_type + ": "): - error_msg = stripped_line[len(error_type)+2:] - error_messages.append(f"{error_type}: {error_msg}") - capture_next_lines = True - indent_level = 0 - break - elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line: - error_msg = stripped_line.split(f'{error_type}:')[-1].strip() - error_messages.append(f"{error_type}: {error_msg}") - capture_next_lines = True - indent_level = 0 - break + # collect the first trace + if collect_trace == False and "Traceback (most recent call last):" in stripped_line: + collect_trace = True + elif collect_trace == True and "Error: " in stripped_line: + error_messages.append(f"{stripped_line}") + collect_trace = False + break + elif stripped_line == "": + collect_trace = False + break + + if collect_trace: + error_messages.append(f"{stripped_line}") + + #for error_type in error_types: + # if stripped_line.startswith(error_type + ": "): + # error_msg = stripped_line[len(error_type)+2:] + # error_messages.append(f"{error_type}: {error_msg}") + # capture_next_lines = True + # indent_level = 0 + # collect_trace = False + # collect_error = True + # break + # elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line: + # error_msg = stripped_line.split(f'{error_type}:')[-1].strip() + # error_messages.append(f"{error_type}: {error_msg}") + # capture_next_lines = True + # indent_level = 0 + # collect_trace = False + # collect_error = True + # break + #if collect_error: + # break return " ; ".join(error_messages) if error_messages else f"{case.result[0].message.splitlines()[0]}" -def print_md_row(row, print_header=False): +def print_md_row(row, print_header=False, failure_list=None): if print_header: header = " | ".join([f"{key}" for key in row.keys()]) print(f"| {header} |") @@ -96,7 +119,12 @@ def print_md_row(row, print_header=False): row_values = " | ".join([f"{value}" for value in row.values()]) print(f"| {row_values} |") -def print_failures(): + if failure_list is not None: + failure_list.write(f"| {row_values} |\n") + + + +def print_failures(failure_list=None): if not failures: return @@ -109,7 +137,7 @@ def print_failures(): 'Status': get_result(case), 'Message': get_message(case), 'Source': case['source'] if isinstance(case, dict) else 'XML' - }, print_header) + }, print_header, failure_list=failure_list) print_header = False def parse_log_file(log_file): @@ -251,7 +279,9 @@ def main(): else: print(f"Skipping unknown file type: {input_file}", file=sys.stderr) - print_failures() + with open("ut_failure_list.csv", "w") as failure_list: + print_failures(failure_list=failure_list) + print_summary() diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 7f29d89d3d..ea193a53b2 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -295,6 +295,9 @@ jobs: source activate xpu_op_${ZE_AFFINITY_MASK} pip install junitparser python .github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true + if [ -e "ut_failure_list.csv" ];then + cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv >> $GITHUB_STEP_SUMMARY || true + fi - name: UT Test Results Check shell: bash run: | @@ -325,6 +328,12 @@ jobs: with: name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }} path: ${{ github.workspace }}/ut_log + - name: Upload XPU UT Failure list + if: always() + uses: actions/upload-artifact@v4 + with: + name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }} + path: ${{ github.workspace }}/ut_log/ut_failure_list.csv distributed_ut_test: runs-on: pvc_e2e From f97e667a55ac71c8ebecbc4ee035d772bb1d9e88 Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Thu, 15 May 2025 00:41:17 -0700 Subject: [PATCH 2/6] collect trace in error message --- .github/scripts/check-ut.py | 60 ++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py index 19d9881b0d..3290fd5063 100644 --- a/.github/scripts/check-ut.py +++ b/.github/scripts/check-ut.py @@ -52,7 +52,7 @@ def get_result(case): result = "failed" return result -def get_message(case): +def get_message(case, failure_list=None): if isinstance(case, dict): return case.get('error', '') @@ -65,48 +65,38 @@ def get_message(case): error_messages = [] capture_next_lines = False indent_level = 0 - collect_trace = False - collect_error = False - import pdb - pdb.set_trace() + + collect_trace_done = False + collect_trace = False + for line in full_text.splitlines(): stripped_line = line.strip() if not stripped_line: continue - # collect the first trace - if collect_trace == False and "Traceback (most recent call last):" in stripped_line: + # Only collet the first trace + if collect_trace_done == False and "Traceback (most recent call last):" in stripped_line: collect_trace = True - elif collect_trace == True and "Error: " in stripped_line: - error_messages.append(f"{stripped_line}") - collect_trace = False - break - elif stripped_line == "": - collect_trace = False - break - + if collect_trace: + if "Error: " in stripped_line: + collect_trace = False + collect_trace_done = True error_messages.append(f"{stripped_line}") - - #for error_type in error_types: - # if stripped_line.startswith(error_type + ": "): - # error_msg = stripped_line[len(error_type)+2:] - # error_messages.append(f"{error_type}: {error_msg}") - # capture_next_lines = True - # indent_level = 0 - # collect_trace = False - # collect_error = True - # break - # elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line: - # error_msg = stripped_line.split(f'{error_type}:')[-1].strip() - # error_messages.append(f"{error_type}: {error_msg}") - # capture_next_lines = True - # indent_level = 0 - # collect_trace = False - # collect_error = True - # break - #if collect_error: - # break + else: + for error_type in error_types: + if stripped_line.startswith(error_type + ": "): + error_msg = stripped_line[len(error_type)+2:] + error_messages.append(f"{error_type}: {error_msg}") + capture_next_lines = True + indent_level = 0 + break + elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line: + error_msg = stripped_line.split(f'{error_type}:')[-1].strip() + error_messages.append(f"{error_type}: {error_msg}") + capture_next_lines = True + indent_level = 0 + break return " ; ".join(error_messages) if error_messages else f"{case.result[0].message.splitlines()[0]}" From ba32f87f493e0a8820aa635b3b6a6e0476be575c Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Thu, 15 May 2025 00:44:49 -0700 Subject: [PATCH 3/6] fix lint issue --- .github/scripts/check-ut.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py index 3290fd5063..c126c92f99 100644 --- a/.github/scripts/check-ut.py +++ b/.github/scripts/check-ut.py @@ -67,7 +67,7 @@ def get_message(case, failure_list=None): indent_level = 0 collect_trace_done = False - collect_trace = False + collect_trace = False for line in full_text.splitlines(): stripped_line = line.strip() @@ -75,13 +75,13 @@ def get_message(case, failure_list=None): continue # Only collet the first trace - if collect_trace_done == False and "Traceback (most recent call last):" in stripped_line: + if not collect_trace_done and "Traceback (most recent call last):" in stripped_line: collect_trace = True - + if collect_trace: if "Error: " in stripped_line: collect_trace = False - collect_trace_done = True + collect_trace_done = True error_messages.append(f"{stripped_line}") else: for error_type in error_types: From 207636da2cfc6a7f34d754f888ac0df4047bb34b Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Thu, 15 May 2025 00:57:32 -0700 Subject: [PATCH 4/6] remove a case from skip list for testing purpose --- test/xpu/skip_list_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index 1d29f99722..d5258f4a6e 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -3064,7 +3064,7 @@ "test_nonzero_xpu", # https://github.com/intel/torch-xpu-ops/issues/1569 # RuntimeError: output 0: meta disagrees with real impl - "test_dispatch_meta_outplace_norm_fro_xpu_bfloat16", + # "test_dispatch_meta_outplace_norm_fro_xpu_bfloat16", "test_dispatch_meta_outplace_norm_fro_xpu_complex128", "test_dispatch_meta_outplace_norm_fro_xpu_complex64", "test_dispatch_meta_outplace_norm_fro_xpu_float", From 2f5b4ea5f28b3851d3cd7b0a8ac665f1132a2d9f Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Thu, 15 May 2025 02:45:38 -0700 Subject: [PATCH 5/6] fix 2025.1 build issue from pr#1669 --- .github/scripts/build.sh | 24 +++++++++++++++++++++++- .github/scripts/env.sh | 16 +--------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh index d9df68705c..4e5ae4aa9a 100755 --- a/.github/scripts/build.sh +++ b/.github/scripts/build.sh @@ -49,9 +49,31 @@ python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py git submodule sync && git submodule update --init --recursive python -m pip install -r requirements.txt python -m pip install mkl-static mkl-include -# python -m pip install -U cmake==3.31.6 +export USE_STATIC_MKL=1 export USE_ONEMKL=1 export USE_XCCL=1 +export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \ + intel-cmplr-lib-rt==2025.1.1 | \ + intel-cmplr-lib-ur==2025.1.1 | \ + intel-cmplr-lic-rt==2025.1.1 | \ + intel-sycl-rt==2025.1.1 | \ + oneccl-devel==2021.15.1; platform_system == 'Linux' and platform_machine == 'x86_64' | \ + oneccl==2021.15.1; platform_system == 'Linux' and platform_machine == 'x86_64' | \ + impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | \ + onemkl-sycl-blas==2025.1.0 | \ + onemkl-sycl-dft==2025.1.0 | \ + onemkl-sycl-lapack==2025.1.0 | \ + onemkl-sycl-rng==2025.1.0 | \ + onemkl-sycl-sparse==2025.1.0 | \ + dpcpp-cpp-rt==2025.1.1 | \ + intel-opencl-rt==2025.1.1 | \ + mkl==2025.1.0 | \ + intel-openmp==2025.1.1 | \ + tbb==2022.1.0 | \ + tcmlib==1.3.0 | \ + umf==0.10.0 | \ + intel-pti==0.12.0 +" # Build sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh index 3fd5b03538..1fb4d8c755 100644 --- a/.github/scripts/env.sh +++ b/.github/scripts/env.sh @@ -1,21 +1,7 @@ #!/bin/bash source /opt/intel/oneapi/compiler/latest/env/vars.sh -source /opt/intel/oneapi/umf/latest/env/vars.sh source /opt/intel/oneapi/pti/latest/env/vars.sh +source /opt/intel/oneapi/umf/latest/env/vars.sh source /opt/intel/oneapi/ccl/latest/env/vars.sh source /opt/intel/oneapi/mpi/latest/env/vars.sh -source /opt/intel/oneapi/mkl/latest/env/vars.sh -export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="\ - intel-cmplr-lib-rt==2025.0.5 |\ - intel-cmplr-lib-ur==2025.0.5 |\ - intel-cmplr-lic-rt==2025.0.5 |\ - intel-sycl-rt==2025.0.5 |\ - impi-devel==2021.14.2 |\ - oneccl-devel==2021.14.1 |\ - mkl-devel==2025.0.1 |\ - onemkl-sycl-blas==2025.0.1 |\ - onemkl-sycl-dft==2025.0.1 |\ - onemkl-sycl-lapack==2025.0.1 |\ - tcmlib==1.2.0 | umf==0.9.1 | intel-pti==0.10.2 \ -" From e4ae0b5eabc1990336764416fab1ec11eeded76a Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Thu, 15 May 2025 07:36:02 -0700 Subject: [PATCH 6/6] run some skipped cases for testing --- test/xpu/skip_list_common.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index d5258f4a6e..eb0b9aff7b 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -6,7 +6,7 @@ "float8", # workarounds for the following tests # https://github.com/intel/torch-xpu-ops/issues/1214 - "test_python_ref__refs_exp_xpu_complex128", + # "test_python_ref__refs_exp_xpu_complex128", "test_python_ref__refs_sigmoid_xpu_complex128", "test_python_ref_executor__refs_log2_executor_aten_xpu_complex128", "test_python_ref_executor__refs_exp_executor_aten_xpu_complex128", @@ -35,7 +35,7 @@ # AssertionError: The supported dtypes for sparse.sampled_addmm on device type xpu are incorrect! - OPs not supported "test_dtypes_sparse_sampled_addmm_xpu", # OPs not supported - "test_errors_dot_xpu", + # "test_errors_dot_xpu", "test_errors_vdot_xpu", # Linalg OPs not supported "test_noncontiguous_samples_linalg_det_xpu_float32", @@ -54,7 +54,7 @@ # RuntimeError: Long is not supported in oneDNN! # RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive # RuntimeError: Double and complex datatype matmul is not supported in oneDNN - "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64", + # "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64", "test_noncontiguous_samples_nn_functional_conv_transpose1d_xpu_int64", "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_complex64", "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_float32", @@ -775,7 +775,7 @@ "test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_2_xpu", "test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_0_xpu", # https://github.com/intel/torch-xpu-ops/issues/1432 - "test_multiheadattention_fastpath_attn_mask_attn_mask_dim_2_key_padding_mask_dim_2_bool_xpu", + # "test_multiheadattention_fastpath_attn_mask_attn_mask_dim_2_key_padding_mask_dim_2_bool_xpu", "test_multiheadattention_fastpath_attn_mask_attn_mask_dim_3_key_padding_mask_dim_2_bool_xpu", "test_transformerencoder_fastpath_use_torchscript_False_enable_nested_tensor_False_use_autocast_False_d_model_12_xpu", "test_transformerencoder_fastpath_use_torchscript_False_enable_nested_tensor_False_use_autocast_True_d_model_12_xpu", @@ -3064,7 +3064,7 @@ "test_nonzero_xpu", # https://github.com/intel/torch-xpu-ops/issues/1569 # RuntimeError: output 0: meta disagrees with real impl - # "test_dispatch_meta_outplace_norm_fro_xpu_bfloat16", + "test_dispatch_meta_outplace_norm_fro_xpu_bfloat16", "test_dispatch_meta_outplace_norm_fro_xpu_complex128", "test_dispatch_meta_outplace_norm_fro_xpu_complex64", "test_dispatch_meta_outplace_norm_fro_xpu_float",