From 237b1bff1140c3e4b8b2fb2b3982cc1ef0fd56c2 Mon Sep 17 00:00:00 2001
From: Daisy Deng <daisy.deng@intel.com>
Date: Thu, 15 May 2025 00:04:38 -0700
Subject: [PATCH 1/6] upload a ut_failure_list.csv as artifacts

---
 .github/scripts/check-ut.py     | 70 +++++++++++++++++++++++----------
 .github/workflows/_linux_ut.yml |  9 +++++
 2 files changed, 59 insertions(+), 20 deletions(-)

diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py
index 5758c4e6d1..19d9881b0d 100644
--- a/.github/scripts/check-ut.py
+++ b/.github/scripts/check-ut.py
@@ -20,9 +20,9 @@
     "IndexError",
     "ImportError",
     "AssertionError",
-    "Exception",
+    #"Exception",
     "OSError",
-    "Failed",
+    #"Failed",
     "TimeoutError",
     "asyncio.TimeoutError",
     "FileNotFoundError",
@@ -65,29 +65,52 @@ def get_message(case):
     error_messages = []
     capture_next_lines = False
     indent_level = 0
-
+    collect_trace = False
+    collect_error = False
+    import pdb
+    pdb.set_trace()
     for line in full_text.splitlines():
         stripped_line = line.strip()
         if not stripped_line:
             continue
 
-        for error_type in error_types:
-            if stripped_line.startswith(error_type + ": "):
-                error_msg = stripped_line[len(error_type)+2:]
-                error_messages.append(f"{error_type}: {error_msg}")
-                capture_next_lines = True
-                indent_level = 0
-                break
-            elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line:
-                error_msg = stripped_line.split(f'{error_type}:')[-1].strip()
-                error_messages.append(f"{error_type}: {error_msg}")
-                capture_next_lines = True
-                indent_level = 0
-                break
+        # collect the first trace 
+        if collect_trace == False and "Traceback (most recent call last):" in stripped_line:
+            collect_trace = True
+        elif collect_trace == True and "Error: " in stripped_line:
+            error_messages.append(f"{stripped_line}")
+            collect_trace = False
+            break
+        elif stripped_line == "":
+            collect_trace = False
+            break
+
+        if collect_trace:
+            error_messages.append(f"{stripped_line}")
+
+        #for error_type in error_types:
+        #    if stripped_line.startswith(error_type + ": "):
+        #        error_msg = stripped_line[len(error_type)+2:]
+        #        error_messages.append(f"{error_type}: {error_msg}")
+        #        capture_next_lines = True
+        #        indent_level = 0
+        #        collect_trace = False
+        #        collect_error = True
+        #        break
+        #    elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line:
+        #        error_msg = stripped_line.split(f'{error_type}:')[-1].strip()
+        #        error_messages.append(f"{error_type}: {error_msg}")
+        #        capture_next_lines = True
+        #        indent_level = 0
+        #        collect_trace = False
+        #        collect_error = True
+        #        break
+        #if collect_error:
+        #    break
 
     return " ; ".join(error_messages) if error_messages else f"{case.result[0].message.splitlines()[0]}"
 
-def print_md_row(row, print_header=False):
+def print_md_row(row, print_header=False, failure_list=None):
     if print_header:
         header = " | ".join([f"{key}" for key in row.keys()])
         print(f"| {header} |")
@@ -96,7 +119,12 @@ def print_md_row(row, print_header=False):
     row_values = " | ".join([f"{value}" for value in row.values()])
     print(f"| {row_values} |")
 
-def print_failures():
+    if failure_list is not None:
+        failure_list.write(f"| {row_values} |\n")
+
+
+
+def print_failures(failure_list=None):
     if not failures:
         return
 
@@ -109,7 +137,7 @@ def print_failures():
             'Status': get_result(case),
             'Message': get_message(case),
             'Source': case['source'] if isinstance(case, dict) else 'XML'
-        }, print_header)
+        }, print_header, failure_list=failure_list)
         print_header = False
 
 def parse_log_file(log_file):
@@ -251,7 +279,9 @@ def main():
         else:
             print(f"Skipping unknown file type: {input_file}", file=sys.stderr)
 
-    print_failures()
+    with open("ut_failure_list.csv", "w") as failure_list:
+        print_failures(failure_list=failure_list)
+
     print_summary()
 
 
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 7f29d89d3d..ea193a53b2 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -295,6 +295,9 @@ jobs:
           source activate xpu_op_${ZE_AFFINITY_MASK}
           pip install junitparser
           python .github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
+          if [ -e "ut_failure_list.csv" ];then
+              cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv >> $GITHUB_STEP_SUMMARY || true
+          fi
       - name: UT Test Results Check
         shell: bash
         run: |
@@ -325,6 +328,12 @@ jobs:
         with:
           name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
           path: ${{ github.workspace }}/ut_log
+      - name: Upload XPU UT Failure list
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }}
+          path: ${{ github.workspace }}/ut_log/ut_failure_list.csv
   
   distributed_ut_test:
     runs-on: pvc_e2e

From f97e667a55ac71c8ebecbc4ee035d772bb1d9e88 Mon Sep 17 00:00:00 2001
From: Daisy Deng <daisy.deng@intel.com>
Date: Thu, 15 May 2025 00:41:17 -0700
Subject: [PATCH 2/6] collect trace in error message

---
 .github/scripts/check-ut.py | 60 ++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py
index 19d9881b0d..3290fd5063 100644
--- a/.github/scripts/check-ut.py
+++ b/.github/scripts/check-ut.py
@@ -52,7 +52,7 @@ def get_result(case):
             result = "failed"
     return result
 
-def get_message(case):
+def get_message(case, failure_list=None):
     if isinstance(case, dict):
         return case.get('error', '')
 
@@ -65,48 +65,38 @@ def get_message(case):
     error_messages = []
     capture_next_lines = False
     indent_level = 0
-    collect_trace = False
-    collect_error = False
-    import pdb
-    pdb.set_trace()
+
+    collect_trace_done = False
+    collect_trace = False 
+
     for line in full_text.splitlines():
         stripped_line = line.strip()
         if not stripped_line:
             continue
 
-        # collect the first trace 
-        if collect_trace == False and "Traceback (most recent call last):" in stripped_line:
+        # Only collet the first trace
+        if collect_trace_done == False and "Traceback (most recent call last):" in stripped_line:
             collect_trace = True
-        elif collect_trace == True and "Error: " in stripped_line:
-            error_messages.append(f"{stripped_line}")
-            collect_trace = False
-            break
-        elif stripped_line == "":
-            collect_trace = False
-            break
-
+ 
         if collect_trace:
+            if "Error: " in stripped_line:
+                collect_trace = False
+                collect_trace_done = True 
             error_messages.append(f"{stripped_line}")
-
-        #for error_type in error_types:
-        #    if stripped_line.startswith(error_type + ": "):
-        #        error_msg = stripped_line[len(error_type)+2:]
-        #        error_messages.append(f"{error_type}: {error_msg}")
-        #        capture_next_lines = True
-        #        indent_level = 0
-        #        collect_trace = False
-        #        collect_error = True
-        #        break
-        #    elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line:
-        #        error_msg = stripped_line.split(f'{error_type}:')[-1].strip()
-        #        error_messages.append(f"{error_type}: {error_msg}")
-        #        capture_next_lines = True
-        #        indent_level = 0
-        #        collect_trace = False
-        #        collect_error = True
-        #        break
-        #if collect_error:
-        #    break
+        else:
+            for error_type in error_types:
+                if stripped_line.startswith(error_type + ": "):
+                    error_msg = stripped_line[len(error_type)+2:]
+                    error_messages.append(f"{error_type}: {error_msg}")
+                    capture_next_lines = True
+                    indent_level = 0
+                    break
+                elif f"{error_type}:" in stripped_line and "Traceback" not in stripped_line:
+                    error_msg = stripped_line.split(f'{error_type}:')[-1].strip()
+                    error_messages.append(f"{error_type}: {error_msg}")
+                    capture_next_lines = True
+                    indent_level = 0
+                    break
 
     return " ; ".join(error_messages) if error_messages else f"{case.result[0].message.splitlines()[0]}"
 

From ba32f87f493e0a8820aa635b3b6a6e0476be575c Mon Sep 17 00:00:00 2001
From: Daisy Deng <daisy.deng@intel.com>
Date: Thu, 15 May 2025 00:44:49 -0700
Subject: [PATCH 3/6] fix lint issue

---
 .github/scripts/check-ut.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py
index 3290fd5063..c126c92f99 100644
--- a/.github/scripts/check-ut.py
+++ b/.github/scripts/check-ut.py
@@ -67,7 +67,7 @@ def get_message(case, failure_list=None):
     indent_level = 0
 
     collect_trace_done = False
-    collect_trace = False 
+    collect_trace = False
 
     for line in full_text.splitlines():
         stripped_line = line.strip()
@@ -75,13 +75,13 @@ def get_message(case, failure_list=None):
             continue
 
         # Only collet the first trace
-        if collect_trace_done == False and "Traceback (most recent call last):" in stripped_line:
+        if not collect_trace_done and "Traceback (most recent call last):" in stripped_line:
             collect_trace = True
- 
+
         if collect_trace:
             if "Error: " in stripped_line:
                 collect_trace = False
-                collect_trace_done = True 
+                collect_trace_done = True
             error_messages.append(f"{stripped_line}")
         else:
             for error_type in error_types:

From 207636da2cfc6a7f34d754f888ac0df4047bb34b Mon Sep 17 00:00:00 2001
From: Daisy Deng <daisy.deng@intel.com>
Date: Thu, 15 May 2025 00:57:32 -0700
Subject: [PATCH 4/6] remove a case from skip list for testing purpose

---
 test/xpu/skip_list_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py
index 1d29f99722..d5258f4a6e 100644
--- a/test/xpu/skip_list_common.py
+++ b/test/xpu/skip_list_common.py
@@ -3064,7 +3064,7 @@
         "test_nonzero_xpu",
         # https://github.com/intel/torch-xpu-ops/issues/1569
         # RuntimeError: output 0: meta disagrees with real impl
-        "test_dispatch_meta_outplace_norm_fro_xpu_bfloat16",
+        # "test_dispatch_meta_outplace_norm_fro_xpu_bfloat16",
         "test_dispatch_meta_outplace_norm_fro_xpu_complex128",
         "test_dispatch_meta_outplace_norm_fro_xpu_complex64",
         "test_dispatch_meta_outplace_norm_fro_xpu_float",

From 2f5b4ea5f28b3851d3cd7b0a8ac665f1132a2d9f Mon Sep 17 00:00:00 2001
From: Daisy Deng <daisy.deng@intel.com>
Date: Thu, 15 May 2025 02:45:38 -0700
Subject: [PATCH 5/6] fix 2025.1 build issue from pr#1669

---
 .github/scripts/build.sh | 24 +++++++++++++++++++++++-
 .github/scripts/env.sh   | 16 +---------------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
index d9df68705c..4e5ae4aa9a 100755
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -49,9 +49,31 @@ python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py
 git submodule sync && git submodule update --init --recursive
 python -m pip install -r requirements.txt
 python -m pip install mkl-static mkl-include
-# python -m pip install -U cmake==3.31.6
+export USE_STATIC_MKL=1
 export USE_ONEMKL=1
 export USE_XCCL=1
+export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
+    intel-cmplr-lib-rt==2025.1.1 | \
+    intel-cmplr-lib-ur==2025.1.1 | \
+    intel-cmplr-lic-rt==2025.1.1 | \
+    intel-sycl-rt==2025.1.1 | \
+    oneccl-devel==2021.15.1; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+    oneccl==2021.15.1; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+    impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | \
+    onemkl-sycl-blas==2025.1.0 | \
+    onemkl-sycl-dft==2025.1.0 | \
+    onemkl-sycl-lapack==2025.1.0 | \
+    onemkl-sycl-rng==2025.1.0 | \
+    onemkl-sycl-sparse==2025.1.0 | \
+    dpcpp-cpp-rt==2025.1.1 | \
+    intel-opencl-rt==2025.1.1 | \
+    mkl==2025.1.0 | \
+    intel-openmp==2025.1.1 | \
+    tbb==2022.1.0 | \
+    tcmlib==1.3.0 | \
+    umf==0.10.0 | \
+    intel-pti==0.12.0
+"
 
 # Build
 sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh
index 3fd5b03538..1fb4d8c755 100644
--- a/.github/scripts/env.sh
+++ b/.github/scripts/env.sh
@@ -1,21 +1,7 @@
 #!/bin/bash
 
 source /opt/intel/oneapi/compiler/latest/env/vars.sh
-source /opt/intel/oneapi/umf/latest/env/vars.sh
 source /opt/intel/oneapi/pti/latest/env/vars.sh
+source /opt/intel/oneapi/umf/latest/env/vars.sh
 source /opt/intel/oneapi/ccl/latest/env/vars.sh
 source /opt/intel/oneapi/mpi/latest/env/vars.sh
-source /opt/intel/oneapi/mkl/latest/env/vars.sh
-export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="\
-    intel-cmplr-lib-rt==2025.0.5 |\
-    intel-cmplr-lib-ur==2025.0.5 |\
-    intel-cmplr-lic-rt==2025.0.5 |\
-    intel-sycl-rt==2025.0.5 |\
-    impi-devel==2021.14.2 |\
-    oneccl-devel==2021.14.1 |\
-    mkl-devel==2025.0.1 |\
-    onemkl-sycl-blas==2025.0.1 |\
-    onemkl-sycl-dft==2025.0.1 |\
-    onemkl-sycl-lapack==2025.0.1 |\
-    tcmlib==1.2.0 | umf==0.9.1 | intel-pti==0.10.2 \
-"

From e4ae0b5eabc1990336764416fab1ec11eeded76a Mon Sep 17 00:00:00 2001
From: Daisy Deng <daisy.deng@intel.com>
Date: Thu, 15 May 2025 07:36:02 -0700
Subject: [PATCH 6/6] run some skipped cases for testing

---
 test/xpu/skip_list_common.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py
index d5258f4a6e..eb0b9aff7b 100644
--- a/test/xpu/skip_list_common.py
+++ b/test/xpu/skip_list_common.py
@@ -6,7 +6,7 @@
         "float8",
         # workarounds for the following tests
         # https://github.com/intel/torch-xpu-ops/issues/1214
-        "test_python_ref__refs_exp_xpu_complex128",
+        # "test_python_ref__refs_exp_xpu_complex128",
         "test_python_ref__refs_sigmoid_xpu_complex128",
         "test_python_ref_executor__refs_log2_executor_aten_xpu_complex128",
         "test_python_ref_executor__refs_exp_executor_aten_xpu_complex128",
@@ -35,7 +35,7 @@
         # AssertionError: The supported dtypes for sparse.sampled_addmm on device type xpu are incorrect! - OPs not supported
         "test_dtypes_sparse_sampled_addmm_xpu",
         # OPs not supported
-        "test_errors_dot_xpu",
+        # "test_errors_dot_xpu",
         "test_errors_vdot_xpu",
         # Linalg OPs not supported
         "test_noncontiguous_samples_linalg_det_xpu_float32",
@@ -54,7 +54,7 @@
         # RuntimeError: Long is not supported in oneDNN!
         # RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive
         # RuntimeError: Double and complex datatype matmul is not supported in oneDNN
-        "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64",
+        # "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64",
         "test_noncontiguous_samples_nn_functional_conv_transpose1d_xpu_int64",
         "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_complex64",
         "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_float32",
@@ -775,7 +775,7 @@
         "test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_2_xpu",
         "test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_0_xpu",
         # https://github.com/intel/torch-xpu-ops/issues/1432
-        "test_multiheadattention_fastpath_attn_mask_attn_mask_dim_2_key_padding_mask_dim_2_bool_xpu",
+        # "test_multiheadattention_fastpath_attn_mask_attn_mask_dim_2_key_padding_mask_dim_2_bool_xpu",
         "test_multiheadattention_fastpath_attn_mask_attn_mask_dim_3_key_padding_mask_dim_2_bool_xpu",
         "test_transformerencoder_fastpath_use_torchscript_False_enable_nested_tensor_False_use_autocast_False_d_model_12_xpu",
         "test_transformerencoder_fastpath_use_torchscript_False_enable_nested_tensor_False_use_autocast_True_d_model_12_xpu",
@@ -3064,7 +3064,7 @@
         "test_nonzero_xpu",
         # https://github.com/intel/torch-xpu-ops/issues/1569
         # RuntimeError: output 0: meta disagrees with real impl
-        # "test_dispatch_meta_outplace_norm_fro_xpu_bfloat16",
+        "test_dispatch_meta_outplace_norm_fro_xpu_bfloat16",
         "test_dispatch_meta_outplace_norm_fro_xpu_complex128",
         "test_dispatch_meta_outplace_norm_fro_xpu_complex64",
         "test_dispatch_meta_outplace_norm_fro_xpu_float",