From e6ca99296021989c7e729b4749e1d830244c1ea1 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 27 Feb 2025 14:01:17 -0800
Subject: [PATCH 01/79] Move UR devops scripts to devops folder

---
 .github/workflows/ur-benchmarks-reusable.yml        |   6 +++---
 .github/workflows/ur-build-hw.yml                   |   2 +-
 .../scripts/benchmarks/README.md                    |   0
 .../scripts/benchmarks/benches/base.py              |   0
 .../scripts/benchmarks/benches/compute.py           |   0
 .../scripts/benchmarks/benches/llamacpp.py          |   0
 .../scripts/benchmarks/benches/oneapi.py            |   0
 .../scripts/benchmarks/benches/result.py            |   0
 .../scripts/benchmarks/benches/syclbench.py         |   0
 .../scripts/benchmarks/benches/test.py              |   0
 .../scripts/benchmarks/benches/umf.py               |   0
 .../scripts/benchmarks/benches/velocity.py          |   0
 .../benchmarks/benchmark_results.html.template      |   0
 .../scripts/benchmarks/history.py                   |   0
 .../scripts/benchmarks/main.py                      |   0
 .../scripts/benchmarks/options.py                   |   0
 .../scripts/benchmarks/output_html.py               |   0
 .../scripts/benchmarks/output_markdown.py           |   0
 .../scripts/benchmarks/requirements.txt             |   0
 .../scripts/benchmarks/utils/compute_runtime.py     |   0
 .../scripts/benchmarks/utils/utils.py               |   0
 .../scripts/benchmarks/workflow.png                 | Bin
 .../.github => devops}/scripts/get_system_info.sh   |   0
 23 files changed, 4 insertions(+), 4 deletions(-)
 rename {unified-runtime => devops}/scripts/benchmarks/README.md (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/base.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/compute.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/llamacpp.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/oneapi.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/result.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/syclbench.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/test.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/umf.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/velocity.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benchmark_results.html.template (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/history.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/main.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/options.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/output_html.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/output_markdown.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/requirements.txt (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/utils/compute_runtime.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/utils/utils.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/workflow.png (100%)
 rename {unified-runtime/.github => devops}/scripts/get_system_info.sh (100%)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index 3b5a0480421d4..6e8a4ea535d15 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -82,7 +82,7 @@ jobs:
 
     - name: Install pip packages
       run: |
-        pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks/requirements.txt
+        pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
 
     - name: Configure SYCL
       run: >
@@ -139,7 +139,7 @@ jobs:
       working-directory: ${{ github.workspace }}
       id: benchmarks
       run: >
-        taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py
+        taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build
         --ur ${{ github.workspace }}/ur_install
@@ -195,4 +195,4 @@ jobs:
 
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/sycl-repo/unified-runtime/.github/scripts/get_system_info.sh
+      run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh
diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml
index 8ad0f45bb35bb..9cf4d262d580d 100644
--- a/.github/workflows/ur-build-hw.yml
+++ b/.github/workflows/ur-build-hw.yml
@@ -145,4 +145,4 @@ jobs:
 
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh
+      run: ${{github.workspace}}/devops/scripts/get_system_info.sh
diff --git a/unified-runtime/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
similarity index 100%
rename from unified-runtime/scripts/benchmarks/README.md
rename to devops/scripts/benchmarks/README.md
diff --git a/unified-runtime/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/base.py
rename to devops/scripts/benchmarks/benches/base.py
diff --git a/unified-runtime/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/compute.py
rename to devops/scripts/benchmarks/benches/compute.py
diff --git a/unified-runtime/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/llamacpp.py
rename to devops/scripts/benchmarks/benches/llamacpp.py
diff --git a/unified-runtime/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/benches/oneapi.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/oneapi.py
rename to devops/scripts/benchmarks/benches/oneapi.py
diff --git a/unified-runtime/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/benches/result.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/result.py
rename to devops/scripts/benchmarks/benches/result.py
diff --git a/unified-runtime/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/syclbench.py
rename to devops/scripts/benchmarks/benches/syclbench.py
diff --git a/unified-runtime/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/test.py
rename to devops/scripts/benchmarks/benches/test.py
diff --git a/unified-runtime/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/umf.py
rename to devops/scripts/benchmarks/benches/umf.py
diff --git a/unified-runtime/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/velocity.py
rename to devops/scripts/benchmarks/benches/velocity.py
diff --git a/unified-runtime/scripts/benchmarks/benchmark_results.html.template b/devops/scripts/benchmarks/benchmark_results.html.template
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benchmark_results.html.template
rename to devops/scripts/benchmarks/benchmark_results.html.template
diff --git a/unified-runtime/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/history.py
rename to devops/scripts/benchmarks/history.py
diff --git a/unified-runtime/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/main.py
rename to devops/scripts/benchmarks/main.py
diff --git a/unified-runtime/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/options.py
rename to devops/scripts/benchmarks/options.py
diff --git a/unified-runtime/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/output_html.py
rename to devops/scripts/benchmarks/output_html.py
diff --git a/unified-runtime/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/output_markdown.py
rename to devops/scripts/benchmarks/output_markdown.py
diff --git a/unified-runtime/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
similarity index 100%
rename from unified-runtime/scripts/benchmarks/requirements.txt
rename to devops/scripts/benchmarks/requirements.txt
diff --git a/unified-runtime/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/utils/compute_runtime.py
rename to devops/scripts/benchmarks/utils/compute_runtime.py
diff --git a/unified-runtime/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/utils/utils.py
rename to devops/scripts/benchmarks/utils/utils.py
diff --git a/unified-runtime/scripts/benchmarks/workflow.png b/devops/scripts/benchmarks/workflow.png
similarity index 100%
rename from unified-runtime/scripts/benchmarks/workflow.png
rename to devops/scripts/benchmarks/workflow.png
diff --git a/unified-runtime/.github/scripts/get_system_info.sh b/devops/scripts/get_system_info.sh
similarity index 100%
rename from unified-runtime/.github/scripts/get_system_info.sh
rename to devops/scripts/get_system_info.sh

From 3d42db259ac9e04b59a9fe4f660024ac9073736d Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 28 Feb 2025 08:38:44 -0800
Subject: [PATCH 02/79] Restrict number of cores used

---
 devops/actions/run-tests/benchmark/action.yml | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 7f69fdf832982..69631d044891c 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -46,6 +46,26 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
+  - name: Compute CPU core range to run benchmarks on
+    run: |
+      # Taken from ur-benchmark-reusable.yml:
+
+      # Compute the core range for the first NUMA node; second node is used by
+      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
+      # work on these.
+      CORES="$(lscpu | awk '
+        /NUMA node0 CPU|On-line CPU/ {line=$0}
+        END {
+          split(line, a, " ")
+          split(a[4], b, ",")
+          sub(/^0/, "4", b[1])
+          print b[1]
+        }')"
+      echo "CPU core range to use: $CORES"
+      echo "CORES=$CORES" >> $GITHUB_ENV
+
+      ZE_AFFINITY_MASK=0
+      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
   - name: Run compute-benchmarks
     shell: bash
     run: |
@@ -69,7 +89,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
+      taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
   - name: Push compute-benchmarks results
     if: always()
     shell: bash

From 4f08dd6fbf51002f45b0c9a44fa0310e94de5001 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 4 Mar 2025 13:20:29 -0800
Subject: [PATCH 03/79] Restore ur-benchmark*.yml

---
 .github/workflows/ur-benchmarks-reusable.yml | 198 ++++++++++++++++++-
 .github/workflows/ur-benchmarks.yml          |  55 +++++-
 2 files changed, 240 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index 66ffcecd70314..6e8a4ea535d15 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -1,12 +1,198 @@
 name: Benchmarks Reusable
 
-# This workflow is a WIP: This workflow file acts as a placeholder.
+on:
+  workflow_call:
+    inputs:
+      str_name:
+        required: true
+        type: string
+      pr_no:
+        required: true
+        # even though this is a number, this is a workaround for issues with
+        # reusable workflow calls that result in "Unexpected value '0'" error.
+        type: string
+      bench_script_params:
+        required: false
+        type: string
+        default: ''
+      sycl_config_params:
+        required: false
+        type: string
+        default: ''
+      upload_report:
+        required: false
+        type: boolean
+        default: false
+      compute_runtime_commit:
+        required: false
+        type: string
+        default: ''
 
-on: [ workflow_call ]
+permissions:
+  contents: read
+  pull-requests: write
 
 jobs:
-  do-nothing:
-    runs-on: ubuntu-latest
+  bench-run:
+    name: Build SYCL, Run Benchmarks
+    strategy:
+      matrix:
+        adapter: [
+          {str_name: "${{ inputs.str_name }}",
+          sycl_config: "${{ inputs.sycl_config_params }}"
+          }
+        ]
+        build_type: [Release]
+        compiler: [{c: clang, cxx: clang++}]
+
+    runs-on: "PVC_PERF"
+
     steps:
-      - run: echo 'This workflow is a WIP.'
-  
+    - name: Add comment to PR
+      uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+      if: ${{ always() && inputs.pr_no != 0 }}
+      with:
+        script: |
+          const pr_no = '${{ inputs.pr_no }}';
+          const adapter = '${{ matrix.adapter.str_name }}';
+          const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
+          const params = '${{ inputs.bench_script_params }}';
+          const body = `Compute Benchmarks ${adapter} run (with params: ${params}):\n${url}`;
+
+          github.rest.issues.createComment({
+            issue_number: pr_no,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: body
+          })
+
+    - name: Checkout SYCL
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        path: sycl-repo
+
+    # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged.
+    - name: Fetch PR's merge commit
+      if: ${{ inputs.pr_no != 0 }}
+      working-directory: ${{github.workspace}}/sycl-repo
+      run: |
+        git fetch -- https://github.com/${{github.repository}} +refs/pull/${{ inputs.pr_no }}/*:refs/remotes/origin/pr/${{ inputs.pr_no }}/*
+        git checkout origin/pr/${{ inputs.pr_no }}/merge
+        git rev-parse origin/pr/${{ inputs.pr_no }}/merge
+
+    - name: Install pip packages
+      run: |
+        pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
+
+    - name: Configure SYCL
+      run: >
+        python3 sycl-repo/buildbot/configure.py
+        -t ${{matrix.build_type}}
+        -o ${{github.workspace}}/sycl_build
+        --cmake-gen "Ninja"
+        --cmake-opt="-DLLVM_INSTALL_UTILS=ON"
+        --cmake-opt="-DSYCL_PI_TESTS=OFF"
+        --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache
+        --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+        ${{matrix.adapter.sycl_config}}
+
+    - name: Build SYCL
+      run: cmake --build ${{github.workspace}}/sycl_build -j $(nproc)
+
+    # We need a complete installed UR for compute-benchmarks.
+    - name: Configure UR
+      run: >
+        cmake -DCMAKE_BUILD_TYPE=${{matrix.build_type}}
+        -S${{github.workspace}}/sycl-repo/unified-runtime
+        -B${{github.workspace}}/ur_build
+        -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/ur_install
+        -DUR_BUILD_TESTS=OFF
+        -DUR_BUILD_ADAPTER_L0=ON
+        -DUR_BUILD_ADAPTER_L0_V2=ON
+        -DUMF_DISABLE_HWLOC=ON
+
+    - name: Build UR
+      run: cmake --build ${{github.workspace}}/ur_build -j $(nproc)
+
+    - name: Install UR
+      run: cmake --install ${{github.workspace}}/ur_build
+
+    - name: Compute core range
+      run: |
+        # Compute the core range for the first NUMA node; second node is for UMF jobs.
+        # Skip the first 4 cores - the kernel is likely to schedule more work on these.
+        CORES="$(lscpu | awk '
+          /NUMA node0 CPU|On-line CPU/ {line=$0}
+          END {
+            split(line, a, " ")
+            split(a[4], b, ",")
+            sub(/^0/, "4", b[1])
+            print b[1]
+          }')"
+        echo "Selected core: $CORES"
+        echo "CORES=$CORES" >> $GITHUB_ENV
+
+        ZE_AFFINITY_MASK=0
+        echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+
+    - name: Run benchmarks
+      working-directory: ${{ github.workspace }}
+      id: benchmarks
+      run: >
+        taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py
+        ~/llvm_bench_workdir
+        --sycl ${{ github.workspace }}/sycl_build
+        --ur ${{ github.workspace }}/ur_install
+        --adapter ${{ matrix.adapter.str_name }}
+        --compare baseline
+        --compute-runtime ${{ inputs.compute_runtime_commit }}
+        --build-igc
+        ${{ inputs.upload_report && '--output-html' || '' }}
+        ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
+        ${{ inputs.bench_script_params }}
+
+    - name: Print benchmark results
+      run: |
+        cat ${{ github.workspace }}/benchmark_results.md || true
+
+    - name: Add comment to PR
+      uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+      if: ${{ always() && inputs.pr_no != 0 }}
+      with:
+        script: |
+          let markdown = ""
+          try {
+            const fs = require('fs');
+            markdown = fs.readFileSync('benchmark_results.md', 'utf8');
+          } catch(err) {
+          }
+
+          const pr_no = '${{ inputs.pr_no }}';
+          const adapter = '${{ matrix.adapter.str_name }}';
+          const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
+          const test_status = '${{ steps.benchmarks.outcome }}';
+          const job_status = '${{ job.status }}';
+          const params = '${{ inputs.bench_script_params }}';
+          const body = `Benchmarks ${adapter} run (${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`;
+
+          github.rest.issues.createComment({
+            issue_number: pr_no,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: body
+          })
+
+    - name: Rename benchmark results file
+      if: ${{ always() && inputs.upload_report }}
+      run: mv benchmark_results.html benchmark_results_${{ inputs.pr_no }}.html
+
+    - name: Upload HTML report
+      if: ${{ always() && inputs.upload_report }}
+      uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
+      with:
+        path: benchmark_results_${{ inputs.pr_no }}.html
+        key: benchmark-results-${{ inputs.pr_no }}-${{ matrix.adapter.str_name }}-${{ github.run_id }}
+
+    - name: Get information about platform
+      if: ${{ always() }}
+      run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh
diff --git a/.github/workflows/ur-benchmarks.yml b/.github/workflows/ur-benchmarks.yml
index 23fbb1ad903b4..cde4bfa828d71 100644
--- a/.github/workflows/ur-benchmarks.yml
+++ b/.github/workflows/ur-benchmarks.yml
@@ -1,12 +1,53 @@
 name: Benchmarks
 
-# This workflow is a WIP: this workflow file acts as a placeholder.
+on:
+  workflow_dispatch:
+    inputs:
+      str_name:
+        description: Adapter
+        type: choice
+        required: true
+        default: 'level_zero'
+        options:
+          - level_zero
+          - level_zero_v2
+      pr_no:
+        description: PR number (0 is sycl main branch)
+        type: number
+        required: true
+      bench_script_params:
+        description: Benchmark script arguments
+        type: string
+        required: false
+        default: ''
+      sycl_config_params:
+        description: Extra params for SYCL configuration
+        type: string
+        required: false
+        default: ''
+      compute_runtime_commit:
+        description: 'Compute Runtime commit'
+        type: string
+        required: false
+        default: ''
+      upload_report:
+        description: 'Upload HTML report'
+        type: boolean
+        required: false
+        default: false
 
-on: [ workflow_dispatch ]
+permissions:
+  contents: read
+  pull-requests: write
 
 jobs:
-  do-nothing:
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo 'This workflow is a WIP.'
-
+  manual:
+    name: Compute Benchmarks
+    uses: ./.github/workflows/ur-benchmarks-reusable.yml
+    with:
+      str_name: ${{ inputs.str_name }}
+      pr_no: ${{ inputs.pr_no }}
+      bench_script_params: ${{ inputs.bench_script_params }}
+      sycl_config_params: ${{ inputs.sycl_config_params }}
+      compute_runtime_commit: ${{ inputs.compute_runtime_commit }}
+      upload_report: ${{ inputs.upload_report }}

From 497dcce9d87e8d610b21afd930669e8059eba54f Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Wed, 5 Mar 2025 16:32:30 +0000
Subject: [PATCH 04/79] [benchmarks] improve HTML and Markdown output

This patch improves numerous aspects on how the benchmarking
results are visualized:
 - rewrites the way HTML charts are generated, using a library (Chart.js)
 that's both easier to use and more visually pleasing.
 The new HTML page also now decouples data from the HTML itself,
 leading to faster load times and the ability to fetch data
 from remote sources.
 - The markdown output now contains a failures section that
 lists all benchmarks that failed for a given run. This will be
 a helpful for developers during PR testing.
 - Benchmarks can now have description that's displayed on the page.
 - And many more minor improvements.
---
 devops/scripts/benchmarks/benches/base.py     |  17 +-
 devops/scripts/benchmarks/benches/compute.py  | 130 ++--
 devops/scripts/benchmarks/benches/llamacpp.py |  18 +-
 .../scripts/benchmarks/benches/syclbench.py   |  36 +-
 devops/scripts/benchmarks/benches/test.py     |  17 +-
 devops/scripts/benchmarks/benches/umf.py      |  33 +-
 devops/scripts/benchmarks/benches/velocity.py |  79 ++-
 .../benchmark_results.html.template           | 192 ------
 devops/scripts/benchmarks/history.py          |  19 +-
 devops/scripts/benchmarks/html/config.js      |   5 +
 devops/scripts/benchmarks/html/index.html     | 205 +++++++
 devops/scripts/benchmarks/html/scripts.js     | 556 ++++++++++++++++++
 devops/scripts/benchmarks/main.py             |  52 +-
 devops/scripts/benchmarks/options.py          |   1 +
 devops/scripts/benchmarks/output_html.py      | 352 +----------
 devops/scripts/benchmarks/output_markdown.py  |  40 +-
 .../benchmarks/{benches => utils}/oneapi.py   |  20 +-
 .../benchmarks/{benches => utils}/result.py   |  17 +-
 devops/scripts/benchmarks/utils/utils.py      |  26 +-
 19 files changed, 1167 insertions(+), 648 deletions(-)
 delete mode 100644 devops/scripts/benchmarks/benchmark_results.html.template
 create mode 100644 devops/scripts/benchmarks/html/config.js
 create mode 100644 devops/scripts/benchmarks/html/index.html
 create mode 100644 devops/scripts/benchmarks/html/scripts.js
 rename devops/scripts/benchmarks/{benches => utils}/oneapi.py (79%)
 rename devops/scripts/benchmarks/{benches => utils}/result.py (69%)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index d1bb5fb53b83a..77365220dbf85 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -6,7 +6,7 @@
 import os
 import shutil
 from pathlib import Path
-from .result import Result
+from utils.result import Result
 from options import options
 from utils.utils import download, run
 import urllib.request
@@ -55,16 +55,25 @@ def create_data_path(self, name, skip_data_dir=False):
             data_path = os.path.join(self.directory, name)
         else:
             data_path = os.path.join(self.directory, "data", name)
-            if options.rebuild and Path(data_path).exists():
+            if options.redownload and Path(data_path).exists():
                 shutil.rmtree(data_path)
 
         Path(data_path).mkdir(parents=True, exist_ok=True)
 
         return data_path
 
-    def download(self, name, url, file, untar=False, unzip=False, skip_data_dir=False):
+    def download(
+        self,
+        name,
+        url,
+        file,
+        untar=False,
+        unzip=False,
+        skip_data_dir=False,
+        checksum="",
+    ):
         self.data_path = self.create_data_path(name, skip_data_dir)
-        return download(self.data_path, url, file, untar, unzip)
+        return download(self.data_path, url, file, untar, unzip, checksum)
 
     def name(self):
         raise NotImplementedError()
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 5e420d560a463..18ed969728902 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -8,10 +8,11 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from options import options
 from enum import Enum
 
+
 class ComputeBench(Suite):
     def __init__(self, directory):
         self.directory = directory
@@ -47,9 +48,8 @@ def setup(self):
                 f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime",
             ]
 
-        print(f"{self.__class__.__name__}: Run {configure_command}")
         run(configure_command, add_sycl=True)
-        print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j")
+
         run(f"cmake --build {build_path} -j", add_sycl=True)
 
         self.built = True
@@ -73,16 +73,6 @@ def benchmarks(self) -> list[Benchmark]:
             ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
             ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
             VectorSum(self),
-            MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
-            MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
-            MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
-            MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
-            MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
-            MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
-            MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
-            MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
-            MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0),
-            MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
             GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
             GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
             GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
@@ -98,6 +88,16 @@ def benchmarks(self) -> list[Benchmark]:
                 SubmitKernelUR(self, 0, 0),
                 SubmitKernelUR(self, 1, 0),
                 SubmitKernelUR(self, 1, 1),
+                MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
+                MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
+                MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
+                MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
+                MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
+                MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
+                MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
+                MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
+                MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0),
+                MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100),
@@ -136,6 +136,9 @@ def setup(self):
     def explicit_group(self):
         return ""
 
+    def description(self) -> str:
+        return ""
+
     def run(self, env_vars) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
@@ -167,6 +170,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
+                    description=self.description()
                 )
             )
         return ret
@@ -221,6 +225,13 @@ def bin_args(self) -> list[str]:
             "--KernelExecTime=1",
         ]
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        return (
+            f"Measures CPU time overhead of submitting {order} kernels through SYCL API."
+            "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
+        )
+
 
 class SubmitKernelUR(ComputeBenchmark):
     def __init__(self, bench, ioq, measureCompletion):
@@ -237,6 +248,15 @@ def name(self):
     def explicit_group(self):
         return "SubmitKernel"
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        completion = "including" if self.measureCompletion else "excluding"
+        return (
+            f"Measures CPU time overhead of submitting {order} kernels through Unified Runtime API, "
+            f"{completion} kernel completion time. Uses 10 simple kernels with minimal execution time "
+            f"to isolate API overhead."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
@@ -261,6 +281,14 @@ def name(self):
     def explicit_group(self):
         return "SubmitKernel"
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        return (
+            f"Measures CPU time overhead of submitting {order} kernels through Level Zero API. "
+            f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead "
+            f"from execution time."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
@@ -286,6 +314,14 @@ def name(self):
         order = "in order" if self.ioq else "out of order"
         return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+        return (
+            f"Measures SYCL {order} queue overhead for {operation} from {self.source} to "
+            f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=100000",
@@ -309,6 +345,13 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
     def name(self):
         return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+        return (
+            f"Measures SYCL in-order queue memory copy performance for {operation} from "
+            f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -330,6 +373,12 @@ def __init__(self, bench, source, destination, size):
     def name(self):
         return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        return (
+            f"Measures general SYCL queue memory copy performance from {self.source} to "
+            f"{self.destination} with {self.size} bytes per operation."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -349,6 +398,12 @@ def __init__(self, bench, type, size, placement):
     def name(self):
         return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}"
 
+    def description(self) -> str:
+        return (
+            f"Measures {self.placement} memory bandwidth using {self.type} pattern with "
+            f"{self.size} bytes. Higher values (GB/s) indicate better performance."
+        )
+
     # measurement is in GB/s
     def lower_is_better(self):
         return False
@@ -362,6 +417,7 @@ def bin_args(self) -> list[str]:
             "--useEvents=0",
             "--contents=Zeros",
             "--multiplier=1",
+            "--vectorSize=1",
         ]
 
 
@@ -372,6 +428,12 @@ def __init__(self, bench):
     def name(self):
         return f"miscellaneous_benchmark_sycl VectorSum"
 
+    def description(self) -> str:
+        return (
+            "Measures performance of vector addition across 3D grid (512x256x256 elements) "
+            "using SYCL."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=1000",
@@ -408,6 +470,16 @@ def name(self):
             + (" without events" if not self.useEvents else "")
         )
 
+    def description(self) -> str:
+        src_type = "device" if self.srcUSM == 1 else "host"
+        dst_type = "device" if self.dstUSM == 1 else "host"
+        events = "with" if self.useEvents else "without"
+        return (
+            f"Measures multithreaded memory copy performance with {self.numThreads} threads "
+            f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes "
+            f"from {src_type} to {dst_type} memory {events} events."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--Ioq=1",
@@ -441,6 +513,13 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
     def explicit_group(self):
         return f"SinKernelGraph {self.numKernels}"
 
+    def description(self) -> str:
+        execution = "using graphs" if self.withGraphs else "without graphs"
+        return (
+            f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+            f"sin kernels {execution}. Tests overhead and benefits of graph-based execution."
+        )
+
     def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}"
 
@@ -452,28 +531,3 @@ def bin_args(self) -> list[str]:
             "--withCopyOffload=1",
             "--immediateAppendCmdList=0",
         ]
-
-
-class GraphApiSubmitExecGraph(ComputeBenchmark):
-    def __init__(self, bench, ioq, submit, numKernels):
-        self.ioq = ioq
-        self.submit = submit
-        self.numKernels = numKernels
-        super().__init__(bench, "graph_api_benchmark_sycl", "SubmitExecGraph")
-
-    def name(self):
-        return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{self.ioq}, submit:{self.submit}, numKernels:{self.numKernels}"
-
-    def explicit_group(self):
-        if self.submit:
-            return "SubmitGraph"
-        else:
-            return "ExecGraph"
-
-    def bin_args(self) -> list[str]:
-        return [
-            "--iterations=100",
-            f"--measureSubmit={self.submit}",
-            f"--ioq={self.ioq}",
-            f"--numKernels={self.numKernels}",
-        ]
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 6524c95a9f56f..d8e0ab5d007bb 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -8,10 +8,10 @@
 from pathlib import Path
 from utils.utils import download, git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import os
 
 
@@ -43,6 +43,7 @@ def setup(self):
             self.models_dir,
             "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
             "Phi-3-mini-4k-instruct-q4.gguf",
+            checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4",
         )
 
         self.oneapi = get_oneapi()
@@ -62,9 +63,9 @@ def setup(self):
             f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"',
             f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}",
         ]
-        print(f"{self.__class__.__name__}: Run {configure_command}")
+
         run(configure_command, add_sycl=True)
-        print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j")
+
         run(
             f"cmake --build {self.build_path} -j",
             add_sycl=True,
@@ -92,6 +93,14 @@ def setup(self):
     def name(self):
         return f"llama.cpp"
 
+    def description(self) -> str:
+        return (
+            "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. "
+            "Runs both prompt processing (initial context processing) and text generation benchmarks with "
+            "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct "
+            "quantized model and leverages SYCL with oneDNN for acceleration."
+        )
+
     def lower_is_better(self):
         return False
 
@@ -130,6 +139,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
+                    description=self.description()
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index f7cf571a7ecd7..47326b2555a68 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -8,7 +8,7 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from options import options
 
 
@@ -65,14 +65,14 @@ def benchmarks(self) -> list[Benchmark]:
             DagTaskS(self),
             HostDevBandwidth(self),
             LocalMem(self),
-            Pattern_L2(self),
-            Reduction(self),
+            # Pattern_L2(self), # validation failure
+            # Reduction(self), # validation failure
             ScalarProd(self),
             SegmentReduction(self),
-            UsmAccLatency(self),
+            # UsmAccLatency(self), # validation failure
             UsmAllocLatency(self),
-            UsmInstrMix(self),
-            UsmPinnedOverhead(self),
+            # UsmInstrMix(self), # validation failure
+            # UsmPinnedOverhead(self), # validation failure
             VecAdd(self),
             # *** sycl-bench single benchmarks
             # TwoDConvolution(self), # run time < 1ms
@@ -82,20 +82,20 @@ def benchmarks(self) -> list[Benchmark]:
             Atax(self),
             # Atomic_reduction(self), # run time < 1ms
             Bicg(self),
-            Correlation(self),
-            Covariance(self),
-            Gemm(self),
-            Gesumv(self),
-            Gramschmidt(self),
+            # Correlation(self), # validation failure
+            # Covariance(self), # validation failure
+            # Gemm(self), # validation failure
+            # Gesumv(self), # validation failure
+            # Gramschmidt(self), # validation failure
             KMeans(self),
             LinRegCoeff(self),
             # LinRegError(self), # run time < 1ms
-            MatmulChain(self),
+            # MatmulChain(self), # validation failure
             MolDyn(self),
-            Mvt(self),
+            # Mvt(self), # validation failure
             Sf(self),
-            Syr2k(self),
-            Syrk(self),
+            # Syr2k(self), # validation failure
+            # Syrk(self), # validation failure
         ]
 
 
@@ -122,7 +122,7 @@ def run(self, env_vars) -> list[Result]:
         if self.done:
             return
         self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
-        print(f"{self.__class__.__name__}: Results in {self.outputfile}")
+
         command = [
             f"{self.benchmark_bin}",
             f"--warmup-run",
@@ -143,7 +143,7 @@ def run(self, env_vars) -> list[Result]:
                 if not row[0].startswith("#"):
                     res_list.append(
                         Result(
-                            label=row[0],
+                            label=f"{self.name()} {row[0]}",
                             value=float(row[12]) * 1000,  # convert to ms
                             passed=(row[1] == "PASS"),
                             command=command,
@@ -161,7 +161,7 @@ def teardown(self):
         return
 
     def name(self):
-        return self.test
+        return f"{self.bench.name()} {self.test}"
 
 
 # multi benchmarks
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 06eac12b25344..18794d4e9c73c 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -6,7 +6,7 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
 import os
@@ -19,6 +19,9 @@ def __init__(self):
     def setup(self):
         return
 
+    def name(self) -> str:
+        return "Test Suite"
+
     def benchmarks(self) -> list[Benchmark]:
         bench_configs = [
             ("Memory Bandwidth", 2000, 200, "Foo Group"),
@@ -36,18 +39,18 @@ def benchmarks(self) -> list[Benchmark]:
                 value = base_value * value_multiplier
                 diff = base_diff * value_multiplier
 
-                result.append(TestBench(name, value, diff, group))
+                result.append(TestBench(self, name, value, diff, group))
 
         return result
 
 
 class TestBench(Benchmark):
-    def __init__(self, name, value, diff, group=""):
+    def __init__(self, suite, name, value, diff, group=""):
+        super().__init__("", suite)
         self.bname = name
         self.value = value
         self.diff = diff
         self.group = group
-        super().__init__("")
 
     def name(self):
         return self.bname
@@ -58,6 +61,9 @@ def lower_is_better(self):
     def setup(self):
         return
 
+    def description(self) -> str:
+        return f"This is a test benchmark for {self.bname}."
+
     def run(self, env_vars) -> list[Result]:
         random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
         return [
@@ -65,10 +71,11 @@ def run(self, env_vars) -> list[Result]:
                 label=self.name(),
                 explicit_group=self.group,
                 value=random_value,
-                command="",
+                command=["test", "--arg1", "foo"],
                 env={"A": "B"},
                 stdout="no output",
                 unit="ms",
+                description=self.description(),
             )
         ]
 
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index 15c343b9a9845..1f736e7755f92 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -6,10 +6,10 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import os
 import csv
 import io
@@ -22,8 +22,6 @@ def isUMFAvailable():
 class UMFSuite(Suite):
     def __init__(self, directory):
         self.directory = directory
-        if not isUMFAvailable():
-            print("UMF not provided. Related benchmarks will not run")
 
     def name(self) -> str:
         return "UMF"
@@ -40,6 +38,8 @@ def benchmarks(self) -> list[Benchmark]:
         benches = [
             GBench(self),
             GBenchUmfProxy(self),
+            GBenchJemalloc(self),
+            GBenchTbbProxy(self),
         ]
 
         return benches
@@ -220,10 +220,31 @@ def parse_output(self, output):
         return results
 
 
-class GBenchUmfProxy(GBenchPreloaded):
+class GBenchGlibc(GBenchPreloaded):
+    def __init__(self, bench, replacing_lib):
+        super().__init__(bench, lib_to_be_replaced="glibc", replacing_lib=replacing_lib)
+
+
+class GBenchUmfProxy(GBenchGlibc):
     def __init__(self, bench):
-        super().__init__(bench, lib_to_be_replaced="glibc", replacing_lib="umfProxy")
+        super().__init__(bench, replacing_lib="umfProxy")
 
     def extra_env_vars(self) -> dict:
         umf_proxy_path = os.path.join(options.umf, "lib", "libumf_proxy.so")
         return {"LD_PRELOAD": umf_proxy_path}
+
+
+class GBenchJemalloc(GBenchGlibc):
+    def __init__(self, bench):
+        super().__init__(bench, replacing_lib="jemalloc")
+
+    def extra_env_vars(self) -> dict:
+        return {"LD_PRELOAD": "libjemalloc.so"}
+
+
+class GBenchTbbProxy(GBenchGlibc):
+    def __init__(self, bench):
+        super().__init__(bench, replacing_lib="tbbProxy")
+
+    def extra_env_vars(self) -> dict:
+        return {"LD_PRELOAD": "libtbbmalloc_proxy.so"}
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index b7d06cbe4a3a2..be36c47ca36d5 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -7,10 +7,10 @@
 import shutil
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import shutil
 
 import os
@@ -115,6 +115,9 @@ def extra_env_vars(self) -> dict:
     def parse_output(self, stdout: str) -> float:
         raise NotImplementedError()
 
+    def description(self) -> str:
+        return ""
+
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
 
@@ -133,6 +136,7 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
+                description=self.description()
             )
         ]
 
@@ -147,6 +151,12 @@ def __init__(self, vb: VelocityBench):
     def name(self):
         return "Velocity-Bench Hashtable"
 
+    def description(self) -> str:
+        return (
+            "Measures hash table search performance using an efficient lock-free algorithm with linear probing. "
+            "Reports throughput in millions of keys processed per second. Higher values indicate better performance."
+        )
+
     def bin_args(self) -> list[str]:
         return ["--no-verify"]
 
@@ -170,6 +180,13 @@ def __init__(self, vb: VelocityBench):
     def name(self):
         return "Velocity-Bench Bitcracker"
 
+    def description(self) -> str:
+        return (
+            "Password-cracking application for BitLocker-encrypted memory units. "
+            "Uses dictionary attack to find user or recovery passwords. "
+            "Measures total time required to process 60000 passwords."
+        )
+
     def bin_args(self) -> list[str]:
         self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass")
 
@@ -204,11 +221,19 @@ def download_deps(self):
             "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=",
             "sobel_filter_data.tgz",
             untar=True,
+            checksum="7fc62aa729792ede80ed8ae70fb56fa443d479139c5888ed4d4047b98caec106687a0f05886a9ced77922ccba7f65e66",
         )
 
     def name(self):
         return "Velocity-Bench Sobel Filter"
 
+    def description(self) -> str:
+        return (
+            "Popular RGB-to-grayscale image conversion technique that applies a gaussian filter "
+            "to reduce edge artifacts. Processes a large 32K x 32K image and measures "
+            "the time required to apply the filter."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "-i",
@@ -249,6 +274,13 @@ def run(self, env_vars) -> list[Result]:
     def name(self):
         return "Velocity-Bench QuickSilver"
 
+    def description(self) -> str:
+        return (
+            "Solves a simplified dynamic Monte Carlo particle-transport problem used in HPC. "
+            "Replicates memory access patterns, communication patterns, and branching of Mercury workloads. "
+            "Reports a figure of merit in MMS/CTT where higher values indicate better performance."
+        )
+
     def lower_is_better(self):
         return False
 
@@ -279,14 +311,22 @@ def __init__(self, vb: VelocityBench):
     def download_deps(self):
         self.download(
             "easywave",
-            "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz",
+            "https://gitlab.oca.eu/AstroGeoGPM/eazyWave/-/raw/master/data/examples.tar.gz",
             "examples.tar.gz",
             untar=True,
+            checksum="3b0cd0efde10122934ba6db8451b8c41f4f95a3370fc967fc5244039ef42aae7e931009af1586fa5ed2143ade8ed47b1",
         )
 
     def name(self):
         return "Velocity-Bench Easywave"
 
+    def description(self) -> str:
+        return (
+            "A tsunami wave simulator used for researching tsunami generation and wave propagation. "
+            "Measures the elapsed time in milliseconds to simulate a specified tsunami event "
+            "based on real-world data."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "-grid",
@@ -341,6 +381,13 @@ def download_deps(self):
     def name(self):
         return "Velocity-Bench CudaSift"
 
+    def description(self) -> str:
+        return (
+            "Implementation of the SIFT (Scale Invariant Feature Transform) algorithm "
+            "for detecting, describing, and matching local features in images. "
+            "Measures average processing time in milliseconds."
+        )
+
     def parse_output(self, stdout: str) -> float:
         match = re.search(r"Avg workload time = (\d+\.\d+) ms", stdout)
         if match:
@@ -364,6 +411,7 @@ def download_deps(self):
             "cifar-10-binary.tar.gz",
             untar=True,
             skip_data_dir=True,
+            checksum="974b1bd62da0cb3b7a42506d42b1e030c9a0cb4a0f2c359063f9c0e65267c48f0329e4493c183a348f44ddc462eaf814",
         )
         return
 
@@ -382,6 +430,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench dl-cifar"
 
+    def description(self) -> str:
+        return (
+            "Deep learning image classification workload based on the CIFAR-10 dataset "
+            "of 60,000 32x32 color images in 10 classes. Uses neural networks to "
+            "classify input images and measures total calculation time."
+        )
+
     def parse_output(self, stdout: str) -> float:
         match = re.search(
             r"dl-cifar - total time for whole calculation: (\d+\.\d+) s", stdout
@@ -407,6 +462,7 @@ def download_deps(self):
             "train-images.idx3-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="f40eb179f7c3d2637e789663bde56d444a23e4a0a14477a9e6ed88bc39c8ad6eaff68056c0cd9bb60daf0062b70dc8ee",
         )
         self.download(
             "datasets",
@@ -414,6 +470,7 @@ def download_deps(self):
             "train-labels.idx1-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="ba9c11bf9a7f7c2c04127b8b3e568cf70dd3429d9029ca59b7650977a4ac32f8ff5041fe42bc872097487b06a6794e00",
         )
         self.download(
             "datasets",
@@ -421,6 +478,7 @@ def download_deps(self):
             "t10k-images.idx3-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="1bf45877962fd391f7abb20534a30fd2203d0865309fec5f87d576dbdbefdcb16adb49220afc22a0f3478359d229449c",
         )
         self.download(
             "datasets",
@@ -428,6 +486,7 @@ def download_deps(self):
             "t10k-labels.idx1-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="ccc1ee70f798a04e6bfeca56a4d0f0de8d8eeeca9f74641c1e1bfb00cf7cc4aa4d023f6ea1b40e79bb4707107845479d",
         )
 
     def extra_cmake_args(self):
@@ -445,6 +504,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench dl-mnist"
 
+    def description(self) -> str:
+        return (
+            "Digit recognition based on the MNIST database, one of the oldest and most popular "
+            "databases of handwritten digits. Uses neural networks to identify digits "
+            "and measures total calculation time."
+        )
+
     def bin_args(self):
         return ["-conv_algo", "ONEDNN_AUTO"]
 
@@ -488,6 +554,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench svm"
 
+    def description(self) -> str:
+        return (
+            "Implementation of Support Vector Machine, a popular classical machine learning technique. "
+            "Uses supervised learning models with associated algorithms to analyze data "
+            "for classification and regression analysis. Measures total elapsed time."
+        )
+
     def bin_args(self):
         return [
             f"{self.code_path}/a9a",
diff --git a/devops/scripts/benchmarks/benchmark_results.html.template b/devops/scripts/benchmarks/benchmark_results.html.template
deleted file mode 100644
index 1deeedad66b00..0000000000000
--- a/devops/scripts/benchmarks/benchmark_results.html.template
+++ /dev/null
@@ -1,192 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <title>Benchmark Results</title>
-    <style>
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-            margin: 0;
-            padding: 16px;
-            background: #f8f9fa;
-        }
-        .container {
-            max-width: 1100px;
-            margin: 0 auto;
-        }
-        h1, h2 {
-            color: #212529;
-            text-align: center;
-            margin-bottom: 24px;
-            font-weight: 500;
-        }
-        .chart {
-            background: white;
-            border-radius: 8px;
-            padding: 24px;
-            margin-bottom: 24px;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-            overflow-x: auto;
-        }
-        .chart > div {
-            min-width: 600px;
-            margin: 0 auto;
-        }
-        @media (max-width: 768px) {
-            body {
-                padding: 12px;
-            }
-            .chart {
-                padding: 16px;
-                border-radius: 6px;
-            }
-            h1 {
-                font-size: 24px;
-                margin-bottom: 16px;
-            }
-        }
-        .filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-        }
-        .filter-container input {
-            padding: 8px;
-            font-size: 16px;
-            border: 1px solid #ccc;
-            border-radius: 4px;
-            width: 400px;
-            max-width: 100%;
-        }
-        .suite-filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .suite-checkbox {
-            margin: 0 8px;
-        }
-        details {
-            margin-bottom: 24px;
-        }
-        summary {
-            font-size: 18px;
-            font-weight: 500;
-            cursor: pointer;
-            padding: 12px;
-            background: #e9ecef;
-            border-radius: 8px;
-            user-select: none;
-        }
-        summary:hover {
-            background: #dee2e6;
-        }
-    </style>
-    <script>
-        function getQueryParam(param) {
-            const urlParams = new URLSearchParams(window.location.search);
-            return urlParams.get(param);
-        }
-
-        function filterCharts() {
-            const regexInput = document.getElementById('bench-filter').value;
-            const regex = new RegExp(regexInput, 'i');
-            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
-            const charts = document.querySelectorAll('.chart');
-
-            charts.forEach(chart => {
-                const label = chart.getAttribute('data-label');
-                const suite = chart.getAttribute('data-suite');
-                if (regex.test(label) && activeSuites.includes(suite)) {
-                    chart.style.display = '';
-                } else {
-                    chart.style.display = 'none';
-                }
-            });
-
-            updateURL();
-        }
-
-        function updateURL() {
-            const url = new URL(window.location);
-            const regex = document.getElementById('bench-filter').value;
-            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
-
-            if (regex) {
-                url.searchParams.set('regex', regex);
-            } else {
-                url.searchParams.delete('regex');
-            }
-
-            if (activeSuites.length > 0) {
-                url.searchParams.set('suites', activeSuites.join(','));
-            } else {
-                url.searchParams.delete('suites');
-            }
-
-            history.replaceState(null, '', url);
-        }
-
-        document.addEventListener('DOMContentLoaded', (event) => {
-            const regexParam = getQueryParam('regex');
-            const suitesParam = getQueryParam('suites');
-
-            if (regexParam) {
-                document.getElementById('bench-filter').value = regexParam;
-            }
-
-            const suiteCheckboxes = document.querySelectorAll('.suite-checkbox');
-            if (suitesParam) {
-                const suites = suitesParam.split(',');
-                suiteCheckboxes.forEach(checkbox => {
-                    if (suites.includes(checkbox.getAttribute('data-suite'))) {
-                        checkbox.checked = true;
-                    } else {
-                        checkbox.checked = false;
-                    }
-                });
-            } else {
-                suiteCheckboxes.forEach(checkbox => {
-                    checkbox.checked = true;
-                });
-            }
-            filterCharts();
-
-            suiteCheckboxes.forEach(checkbox => {
-                checkbox.addEventListener('change', () => {
-                    filterCharts();
-                });
-            });
-
-            document.getElementById('bench-filter').addEventListener('input', () => {
-                filterCharts();
-            });
-        });
-    </script>
-</head>
-<body>
-    <div class="container">
-        <h1>Benchmark Results</h1>
-        <div class="filter-container">
-            <input type="text" id="bench-filter" placeholder="Regex...">
-        </div>
-        <div class="suite-filter-container">
-            ${suite_checkboxes_html}
-        </div>
-        <details class="timeseries">
-            <summary>Historical Results</summary>
-            <div class="charts">
-                ${timeseries_charts_html}
-            </div>
-        </details>
-        <details class="bar-charts">
-            <summary>Comparisons</summary>
-            <div class="charts">
-                ${bar_charts_html}
-            </div>
-        </details>
-    </div>
-</body>
-</html>
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 7902aa4f04c35..2bb0b9db8ea38 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -6,7 +6,7 @@
 import os
 import json
 from pathlib import Path
-from benches.result import Result, BenchmarkRun
+from utils.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
 from utils.utils import run
@@ -63,12 +63,29 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
             result = run("git rev-parse --short HEAD")
             git_hash = result.stdout.decode().strip()
+
+            # Get the GitHub repo URL from git remote
+            remote_result = run("git remote get-url origin")
+            remote_url = remote_result.stdout.decode().strip()
+
+            # Convert SSH or HTTPS URL to owner/repo format
+            if remote_url.startswith("git@github.com:"):
+                # SSH format: git@github.com:owner/repo.git
+                github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
+            elif remote_url.startswith("https://github.com/"):
+                # HTTPS format: https://github.com/owner/repo.git
+                github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+            else:
+                github_repo = None
+
         except:
             git_hash = "unknown"
+            github_repo = None
 
         return BenchmarkRun(
             name=name,
             git_hash=git_hash,
+            github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
         )
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
new file mode 100644
index 0000000000000..c1210b2b21da5
--- /dev/null
+++ b/devops/scripts/benchmarks/html/config.js
@@ -0,0 +1,5 @@
+const config = {
+    remoteDataUrl: ''
+};
+// defaultCompareNames = [];
+// suiteNames = [];
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
new file mode 100644
index 0000000000000..c10844f15c707
--- /dev/null
+++ b/devops/scripts/benchmarks/html/index.html
@@ -0,0 +1,205 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+  See LICENSE.TXT
+  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+-->
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Benchmark Results</title>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns"></script>
+    <script src="data.js"></script>
+    <script src="config.js"></script>
+    <script src="scripts.js"></script>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            margin: 0;
+            padding: 16px;
+            background: #f8f9fa;
+        }
+        .container {
+            max-width: 1100px;
+            margin: 0 auto;
+        }
+        h1, h2 {
+            color: #212529;
+            text-align: center;
+            margin-bottom: 24px;
+            font-weight: 500;
+        }
+        .chart-container {
+            background: white;
+            border-radius: 8px;
+            padding: 24px;
+            margin-bottom: 24px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+        }
+        @media (max-width: 768px) {
+            body {
+                padding: 12px;
+            }
+            .chart-container {
+                padding: 16px;
+                border-radius: 6px;
+            }
+            h1 {
+                font-size: 24px;
+                margin-bottom: 16px;
+            }
+        }
+        .filter-container {
+            text-align: center;
+            margin-bottom: 24px;
+        }
+        .filter-container input {
+            padding: 8px;
+            font-size: 16px;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            width: 400px;
+            max-width: 100%;
+        }
+        .suite-filter-container {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .suite-checkbox {
+            margin: 0 8px;
+        }
+        details {
+            margin-bottom: 24px;
+        }
+        summary {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            font-size: 16px;
+            font-weight: 500;
+            cursor: pointer;
+            padding: 8px;
+            background: #e9ecef;
+            border-radius: 8px;
+            user-select: none;
+        }
+        summary:hover {
+            background: #dee2e6;
+        }
+        .extra-info {
+            padding: 8px;
+            background: #f8f9fa;
+            border-radius: 8px;
+            margin-top: 8px;
+        }
+        .run-selector {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .run-selector select {
+            width: 300px;
+            padding: 8px;
+            margin-right: 8px;
+        }
+        .run-selector button {
+            padding: 8px 16px;
+            background: #0068B5;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        .run-selector button:hover {
+            background: #00C7FD;
+        }
+        .selected-runs {
+            margin-top: 12px;
+        }
+        .selected-run {
+            display: inline-block;
+            padding: 4px 8px;
+            margin: 4px;
+            background: #e2e6ea;
+            border-radius: 4px;
+        }
+        .selected-run button {
+            margin-left: 8px;
+            padding: 0 4px;
+            background: none;
+            border: none;
+            color: #dc3545;
+            cursor: pointer;
+        }
+        .download-button {
+            background: none;
+            border: none;
+            color: #0068B5;
+            cursor: pointer;
+            font-size: 16px;
+            padding: 4px;
+            margin-left: 8px;
+        }
+        .download-button:hover {
+            color: #00C7FD;
+        }
+        .loading-indicator {
+            text-align: center;
+            font-size: 18px;
+            color: #0068B5;
+            margin-bottom: 20px;
+        }
+        .extra-info-entry {
+            border: 1px solid #ddd;
+            padding: 10px;
+            margin-bottom: 10px;
+            background-color: #f9f9f9;
+            border-radius: 5px;
+        }
+        .extra-info-entry strong {
+            display: block;
+            margin-bottom: 5px;
+        }
+        .extra-info-entry em {
+            color: #555;
+        }
+</style>
+</head>
+<body>
+    <div class="container">
+        <h1>Benchmark Results</h1>
+        <div id="loading-indicator" class="loading-indicator" style="display: none;">
+            Loading data, please wait...
+        </div>
+        <div class="filter-container">
+            <input type="text" id="bench-filter" placeholder="Regex...">
+        </div>
+        <div class="suite-filter-container" id="suite-filters">
+            <!-- Suite checkboxes will be generated by JavaScript -->
+        </div>
+        <div class="run-selector">
+            <select id="run-select">
+                <option value="">Select a run to compare...</option>
+            </select>
+            <button onclick="addSelectedRun()">Add</button>
+            <div id="selected-runs" class="selected-runs"></div>
+        </div>
+        <details class="timeseries" open>
+            <summary>Historical Results</summary>
+            <div class="charts"></div>
+        </details>
+        <details class="bar-charts" open>
+            <summary>Comparisons</summary>
+            <div class="charts"></div>
+        </details>
+    </div>
+</body>
+</html>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
new file mode 100644
index 0000000000000..8f0272048136d
--- /dev/null
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -0,0 +1,556 @@
+// Copyright (C) 2024 Intel Corporation
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+// See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Core state
+let activeRuns = new Set(defaultCompareNames);
+let chartInstances = new Map();
+let timeseriesData, barChartsData, allRunNames;
+
+// DOM Elements
+let runSelect, selectedRunsDiv, suiteFiltersContainer;
+
+// Run selector functions
+function updateSelectedRuns() {
+    selectedRunsDiv.innerHTML = '';
+    activeRuns.forEach(name => {
+        selectedRunsDiv.appendChild(createRunElement(name));
+    });
+    updateCharts();
+}
+
+function createRunElement(name) {
+    const runElement = document.createElement('span');
+    runElement.className = 'selected-run';
+    runElement.innerHTML = `${name} <button onclick="removeRun('${name}')">X</button>`;
+    return runElement;
+}
+
+function addSelectedRun() {
+    const selectedRun = runSelect.value;
+    if (selectedRun && !activeRuns.has(selectedRun)) {
+        activeRuns.add(selectedRun);
+        updateSelectedRuns();
+    }
+}
+
+function removeRun(name) {
+    activeRuns.delete(name);
+    updateSelectedRuns();
+}
+
+// Chart creation and update
+function createChart(data, containerId, type) {
+    if (chartInstances.has(containerId)) {
+        chartInstances.get(containerId).destroy();
+    }
+
+    const ctx = document.getElementById(containerId).getContext('2d');
+    const options = {
+        responsive: true,
+        plugins: {
+            title: {
+                display: true,
+                text: data.label
+            },
+            subtitle: {
+                display: true,
+                text: data.lower_is_better ? "Lower is better" : "Higher is better"
+            },
+            tooltip: {
+                callbacks: {
+                    label: (context) => {
+                        if (type === 'time') {
+                            const point = context.raw;
+                            return [
+                                `${data.label}:`,
+                                `Value: ${point.y.toFixed(2)} ${data.unit}`,
+                                `Stddev: ${point.stddev.toFixed(2)} ${data.unit}`,
+                                `Git Hash: ${point.gitHash}`,
+                            ];
+                        } else {
+                            return [`${context.dataset.label}:`,
+                                `Value: ${context.parsed.y.toFixed(2)} ${data.unit}`,
+                            ];
+                        }
+                    }
+                }
+            }
+        },
+        scales: {
+            y: {
+                title: {
+                    display: true,
+                    text: data.unit
+                }
+            }
+        }
+    };
+
+    if (type === 'time') {
+        options.interaction = {
+            mode: 'nearest',
+            intersect: false
+        };
+        options.onClick = (event, elements) => {
+            if (elements.length > 0) {
+                const point = elements[0].element.$context.raw;
+                if (point.gitHash && point.gitRepo) {
+                    window.open(`https://github.com/${point.gitRepo}/commit/${point.gitHash}`, '_blank');
+                }
+            }
+        };
+        options.scales.x = {
+            type: 'time',
+            ticks: {
+                maxRotation: 45,
+                minRotation: 45,
+                autoSkip: true,
+                maxTicksLimit: 10
+            }
+        };
+    }
+
+    const chartConfig = {
+        type: type === 'time' ? 'line' : 'bar',
+        data: type === 'time' ?
+            {
+                datasets: createTimeseriesDatasets(data)
+            } :
+            {
+                labels: data.labels,
+                datasets: data.datasets
+            },
+        options: options
+    };
+
+    const chart = new Chart(ctx, chartConfig);
+    chartInstances.set(containerId, chart);
+    return chart;
+}
+
+function createTimeseriesDatasets(data) {
+    return Object.entries(data.runs).map(([name, points]) => ({
+        label: name,
+        data: points.map(p => ({
+            x: new Date(p.date),
+            y: p.value,
+            gitHash: p.git_hash,
+            gitRepo: p.github_repo,
+            stddev: p.stddev
+        })),
+        borderWidth: 1,
+        pointRadius: 3,
+        pointStyle: 'circle',
+        pointHoverRadius: 5
+    }));
+}
+
+function updateCharts() {
+    // Filter data by active runs
+    const filteredTimeseriesData = timeseriesData.map(chart => ({
+        ...chart,
+        runs: Object.fromEntries(
+            Object.entries(chart.runs).filter(([name]) => activeRuns.has(name))
+        )
+    }));
+
+    const filteredBarChartsData = barChartsData.map(chart => ({
+        ...chart,
+        labels: chart.labels.filter(label => activeRuns.has(label)),
+        datasets: chart.datasets.map(dataset => ({
+            ...dataset,
+            data: dataset.data.filter((_, i) => activeRuns.has(chart.labels[i]))
+        }))
+    }));
+
+    // Draw charts with filtered data
+    drawCharts(filteredTimeseriesData, filteredBarChartsData);
+}
+
+function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
+    // Clear existing charts
+    document.querySelectorAll('.charts').forEach(container => container.innerHTML = '');
+    chartInstances.forEach(chart => chart.destroy());
+    chartInstances.clear();
+
+    // Create timeseries charts
+    filteredTimeseriesData.forEach((data, index) => {
+        const containerId = `timeseries-${index}`;
+        const container = createChartContainer(data, containerId);
+        document.querySelector('.timeseries .charts').appendChild(container);
+        createChart(data, containerId, 'time');
+    });
+
+    // Create bar charts
+    filteredBarChartsData.forEach((data, index) => {
+        const containerId = `barchart-${index}`;
+        const container = createChartContainer(data, containerId);
+        document.querySelector('.bar-charts .charts').appendChild(container);
+        createChart(data, containerId, 'bar');
+    });
+
+    // Apply current filters
+    filterCharts();
+}
+
+function createChartContainer(data, canvasId) {
+    const container = document.createElement('div');
+    container.className = 'chart-container';
+    container.setAttribute('data-label', data.label);
+    container.setAttribute('data-suite', data.suite);
+
+    const canvas = document.createElement('canvas');
+    canvas.id = canvasId;
+    container.appendChild(canvas);
+
+    // Create details section for extra info
+    const details = document.createElement('details');
+    const summary = document.createElement('summary');
+    summary.textContent = "Details";
+
+    // Add subtle download button to the summary
+    const downloadButton = document.createElement('button');
+    downloadButton.className = 'download-button';
+    downloadButton.textContent = 'Download';
+    downloadButton.onclick = (event) => {
+        event.stopPropagation(); // Prevent details toggle
+        downloadChart(canvasId, data.label);
+    };
+    summary.appendChild(downloadButton);
+    details.appendChild(summary);
+
+    // Create and append extra info
+    const extraInfo = document.createElement('div');
+    extraInfo.className = 'extra-info';
+    extraInfo.innerHTML = generateExtraInfo(data);
+    details.appendChild(extraInfo);
+
+    container.appendChild(details);
+
+    return container;
+}
+
+// Pre-compute a lookup for the latest run per label
+function createLatestRunsLookup(benchmarkRuns) {
+    const latestRunsMap = new Map();
+
+    benchmarkRuns.forEach(run => {
+        // Yes, we need to convert the date every time. I checked.
+        const runDate = new Date(run.date);
+        run.results.forEach(result => {
+            const label = result.label;
+            if (!latestRunsMap.has(label) || runDate > new Date(latestRunsMap.get(label).date)) {
+                latestRunsMap.set(label, {
+                    run,
+                    result
+                });
+            }
+        });
+    });
+
+    return latestRunsMap;
+}
+const latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
+
+function generateExtraInfo(data) {
+    const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
+
+    return labels.map(label => {
+        const latestRun = latestRunsLookup.get(label);
+
+        if (latestRun) {
+            return `<div class="extra-info-entry">
+                        <strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>
+                        <em>Description:</em> ${latestRun.result.description}
+                    </div>`;
+        }
+        return `<div class="extra-info-entry">
+                        <strong>${label}:</strong> No data available
+                </div>`;
+    }).join('');
+}
+
+function formatCommand(run) {
+    const envVars = Object.entries(run.env || {}).map(([key, value]) => `${key}=${value}`).join(' ');
+    let command = run.command ? [...run.command] : [];
+
+    return `${envVars} ${command.join(' ')}`.trim();
+}
+
+function downloadChart(canvasId, label) {
+    const chart = chartInstances.get(canvasId);
+    if (chart) {
+        const link = document.createElement('a');
+        link.href = chart.toBase64Image('image/jpeg', 1)
+        link.download = `${label}.png`;
+        link.click();
+    }
+}
+
+// URL and filtering functions
+function getQueryParam(param) {
+    const urlParams = new URLSearchParams(window.location.search);
+    return urlParams.get(param);
+}
+
+function updateURL() {
+    const url = new URL(window.location);
+    const regex = document.getElementById('bench-filter').value;
+    const activeSuites = getActiveSuites();
+    const activeRunsList = Array.from(activeRuns);
+
+    if (regex) {
+        url.searchParams.set('regex', regex);
+    } else {
+        url.searchParams.delete('regex');
+    }
+
+    if (activeSuites.length > 0) {
+        url.searchParams.set('suites', activeSuites.join(','));
+    } else {
+        url.searchParams.delete('suites');
+    }
+
+    // Handle the runs parameter
+    if (activeRunsList.length > 0) {
+        // Check if the active runs are the same as default runs
+        const defaultRuns = new Set(defaultCompareNames || []);
+        const isDefaultRuns = activeRunsList.length === defaultRuns.size &&
+            activeRunsList.every(run => defaultRuns.has(run));
+
+        if (isDefaultRuns) {
+            // If it's just the default runs, omit the parameter entirely
+            url.searchParams.delete('runs');
+        } else {
+            url.searchParams.set('runs', activeRunsList.join(','));
+        }
+    } else {
+        url.searchParams.delete('runs');
+    }
+
+    history.replaceState(null, '', url);
+}
+
+function filterCharts() {
+    const regexInput = document.getElementById('bench-filter').value;
+    const regex = new RegExp(regexInput, 'i');
+    const activeSuites = getActiveSuites();
+
+    document.querySelectorAll('.chart-container').forEach(container => {
+        const label = container.getAttribute('data-label');
+        const suite = container.getAttribute('data-suite');
+        container.style.display = (regex.test(label) && activeSuites.includes(suite)) ? '' : 'none';
+    });
+
+    updateURL();
+}
+
+function getActiveSuites() {
+    return Array.from(document.querySelectorAll('.suite-checkbox:checked'))
+        .map(checkbox => checkbox.getAttribute('data-suite'));
+}
+
+// Data processing
+function processTimeseriesData(benchmarkRuns) {
+    const resultsByLabel = {};
+
+    benchmarkRuns.forEach(run => {
+        const runDate = run.date ? new Date(run.date) : null;
+        run.results.forEach(result => {
+            if (!resultsByLabel[result.label]) {
+                resultsByLabel[result.label] = {
+                    label: result.label,
+                    suite: result.suite,
+                    unit: result.unit,
+                    lower_is_better: result.lower_is_better,
+                    runs: {}
+                };
+            }
+
+            if (!resultsByLabel[result.label].runs[run.name]) {
+                resultsByLabel[result.label].runs[run.name] = [];
+            }
+
+            resultsByLabel[result.label].runs[run.name].push({
+                date: runDate,
+                value: result.value,
+                stddev: result.stddev,
+                git_hash: run.git_hash,
+                github_repo: run.github_repo
+            });
+        });
+    });
+
+    return Object.values(resultsByLabel);
+}
+
+function processBarChartsData(benchmarkRuns) {
+    const groupedResults = {};
+
+    benchmarkRuns.forEach(run => {
+        run.results.forEach(result => {
+            if (!result.explicit_group) return;
+
+            if (!groupedResults[result.explicit_group]) {
+                groupedResults[result.explicit_group] = {
+                    label: result.explicit_group,
+                    suite: result.suite,
+                    unit: result.unit,
+                    lower_is_better: result.lower_is_better,
+                    labels: [],
+                    datasets: []
+                };
+            }
+
+            const group = groupedResults[result.explicit_group];
+
+            if (!group.labels.includes(run.name)) {
+                group.labels.push(run.name);
+            }
+
+            let dataset = group.datasets.find(d => d.label === result.label);
+            if (!dataset) {
+                dataset = {
+                    label: result.label,
+                    data: new Array(group.labels.length).fill(null)
+                };
+                group.datasets.push(dataset);
+            }
+
+            const runIndex = group.labels.indexOf(run.name);
+            dataset.data[runIndex] = result.value;
+        });
+    });
+
+    return Object.values(groupedResults);
+}
+
+// Setup functions
+function setupRunSelector() {
+    runSelect = document.getElementById('run-select');
+    selectedRunsDiv = document.getElementById('selected-runs');
+
+    allRunNames.forEach(name => {
+        const option = document.createElement('option');
+        option.value = name;
+        option.textContent = name;
+        runSelect.appendChild(option);
+    });
+
+    updateSelectedRuns();
+}
+
+function setupSuiteFilters() {
+    suiteFiltersContainer = document.getElementById('suite-filters');
+
+    suiteNames.forEach(suite => {
+        const label = document.createElement('label');
+        const checkbox = document.createElement('input');
+        checkbox.type = 'checkbox';
+        checkbox.className = 'suite-checkbox';
+        checkbox.dataset.suite = suite;
+        checkbox.checked = true;
+        label.appendChild(checkbox);
+        label.appendChild(document.createTextNode(' ' + suite));
+        suiteFiltersContainer.appendChild(label);
+        suiteFiltersContainer.appendChild(document.createTextNode(' '));
+    });
+}
+
+function initializeCharts() {
+    // Process raw data
+    timeseriesData = processTimeseriesData(benchmarkRuns);
+    barChartsData = processBarChartsData(benchmarkRuns);
+    allRunNames = [...new Set(benchmarkRuns.map(run => run.name))];
+
+    // Set up active runs
+    const runsParam = getQueryParam('runs');
+    if (runsParam) {
+        const runsFromUrl = runsParam.split(',');
+
+        // Start with an empty set
+        activeRuns = new Set();
+
+        // Process each run from URL
+        runsFromUrl.forEach(run => {
+            if (run === 'default') {
+                // Special case: include all default runs
+                (defaultCompareNames || []).forEach(defaultRun => {
+                    if (allRunNames.includes(defaultRun)) {
+                        activeRuns.add(defaultRun);
+                    }
+                });
+            } else if (allRunNames.includes(run)) {
+                // Add the specific run if it exists
+                activeRuns.add(run);
+            }
+        });
+    } else {
+        // No runs parameter, use defaults
+        activeRuns = new Set(defaultCompareNames || []);
+    }
+
+    // Setup UI components
+    setupRunSelector();
+    setupSuiteFilters();
+
+    // Apply URL parameters
+    const regexParam = getQueryParam('regex');
+    const suitesParam = getQueryParam('suites');
+
+    if (regexParam) {
+        document.getElementById('bench-filter').value = regexParam;
+    }
+
+    if (suitesParam) {
+        const suites = suitesParam.split(',');
+        document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
+            checkbox.checked = suites.includes(checkbox.getAttribute('data-suite'));
+        });
+    }
+
+    // Setup event listeners
+    document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
+        checkbox.addEventListener('change', filterCharts);
+    });
+    document.getElementById('bench-filter').addEventListener('input', filterCharts);
+
+    // Draw initial charts
+    updateCharts();
+}
+
+// Make functions available globally for onclick handlers
+window.addSelectedRun = addSelectedRun;
+window.removeRun = removeRun;
+
+// Load data based on configuration
+function loadData() {
+    const loadingIndicator = document.getElementById('loading-indicator');
+    loadingIndicator.style.display = 'block'; // Show loading indicator
+
+    if (config.remoteDataUrl && config.remoteDataUrl !== '') {
+        // Fetch data from remote URL
+        fetch(config.remoteDataUrl)
+            .then(response => response.text())
+            .then(scriptContent => {
+                // Evaluate the script content
+                eval(scriptContent);
+                initializeCharts();
+            })
+            .catch(error => console.error('Error fetching remote data:', error))
+            .finally(() => {
+                loadingIndicator.style.display = 'none'; // Hide loading indicator
+            });
+    } else {
+        // Use local data
+        initializeCharts();
+        loadingIndicator.style.display = 'none'; // Hide loading indicator
+    }
+}
+
+// Initialize when DOM is ready
+document.addEventListener('DOMContentLoaded', () => {
+    loadData();
+});
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 4ad90b39b9001..8f5330d7b4f62 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -27,23 +27,27 @@
 
 
 def run_iterations(
-    benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]]
+    benchmark: Benchmark,
+    env_vars,
+    iters: int,
+    results: dict[str, list[Result]],
+    failures: dict[str, str],
 ):
     for iter in range(iters):
-        print(f"running {benchmark.name()}, iteration {iter}... ", end="", flush=True)
+        print(f"running {benchmark.name()}, iteration {iter}... ", flush=True)
         bench_results = benchmark.run(env_vars)
         if bench_results is None:
-            print(f"did not finish (OK for sycl-bench).")
+            failures[benchmark.name()] = "benchmark produced no results!"
             break
 
         for bench_result in bench_results:
-            # TODO: report failures in markdown/html ?
             if not bench_result.passed:
-                print(f"complete ({bench_result.label}: verification FAILED)")
+                failures[bench_result.label] = "verification failed"
+                print(f"complete ({bench_result.label}: verification failed).")
                 continue
 
             print(
-                f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
+                f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
             )
 
             bench_result.name = bench_result.label
@@ -156,6 +160,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     )
 
     benchmarks = []
+    failures = {}
 
     for s in suites:
         suite_benchmarks = s.benchmarks()
@@ -170,7 +175,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             print(f"Setting up {type(s).__name__}")
             try:
                 s.setup()
-            except:
+            except Exception as e:
+                failures[s.name()] = f"Suite setup failure: {e}"
                 print(f"{type(s).__name__} setup failed. Benchmarks won't be added.")
             else:
                 print(f"{type(s).__name__} setup complete.")
@@ -189,6 +195,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             if options.exit_on_failure:
                 raise e
             else:
+                failures[benchmark.name()] = f"Benchmark setup failure: {e}"
                 print(f"failed: {e}")
 
     results = []
@@ -199,7 +206,11 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             processed: list[Result] = []
             for _ in range(options.iterations_stddev):
                 run_iterations(
-                    benchmark, merged_env_vars, options.iterations, intermediate_results
+                    benchmark,
+                    merged_env_vars,
+                    options.iterations,
+                    intermediate_results,
+                    failures,
                 )
                 valid, processed = process_results(
                     intermediate_results, benchmark.stddev_threshold()
@@ -211,12 +222,16 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             if options.exit_on_failure:
                 raise e
             else:
+                failures[benchmark.name()] = f"Benchmark run failure: {e}"
                 print(f"failed: {e}")
 
     for benchmark in benchmarks:
-        print(f"tearing down {benchmark.name()}... ", end="", flush=True)
+        # this never has any useful information anyway, so hide it behind verbose
+        if options.verbose:
+            print(f"tearing down {benchmark.name()}... ", flush=True)
         benchmark.teardown()
-        print("complete.")
+        if options.verbose:
+            print("{benchmark.name()} teardown complete.")
 
     this_name = options.current_run_name
     chart_data = {}
@@ -241,7 +256,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
 
     if options.output_markdown:
         markdown_content = generate_markdown(
-            this_name, chart_data, options.output_markdown
+            this_name, chart_data, failures, options.output_markdown
         )
 
         with open("benchmark_results.md", "w") as file:
@@ -262,14 +277,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             compare_names.append(saved_name)
 
     if options.output_html:
-        html_content = generate_html(history.runs, "intel/llvm", compare_names)
+        generate_html(history.runs, compare_names)
 
-        with open("benchmark_results.html", "w") as file:
-            file.write(html_content)
-
-        print(
-            f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html"
-        )
+        print(f"See {os.getcwd()}/html/index.html for the results.")
 
 
 def validate_and_parse_env_args(env_args):
@@ -305,6 +315,11 @@ def validate_and_parse_env_args(env_args):
         help="Do not rebuild the benchmarks from scratch.",
         action="store_true",
     )
+    parser.add_argument(
+        "--redownload",
+        help="Always download benchmark data dependencies, even if they already exist.",
+        action="store_true",
+    )
     parser.add_argument(
         "--env",
         type=str,
@@ -430,6 +445,7 @@ def validate_and_parse_env_args(env_args):
     options.workdir = args.benchmark_directory
     options.verbose = args.verbose
     options.rebuild = not args.no_rebuild
+    options.redownload = args.redownload
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 2e92675264544..206ca94eb0d0b 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -21,6 +21,7 @@ class Options:
     ur_adapter: str = None
     umf: str = None
     rebuild: bool = True
+    redownload: bool = False
     benchmark_cwd: str = "INVALID"
     timeout: float = 600
     iterations: int = 3
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 4ba395bc3aac6..e6e3212dbcdb2 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -3,338 +3,36 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import re
+import json
 import os
-from pathlib import Path
-import matplotlib.pyplot as plt
-import mpld3
-from collections import defaultdict
-from dataclasses import dataclass
-import matplotlib.dates as mdates
-from benches.result import BenchmarkRun, Result
-import numpy as np
-from string import Template
 
 
-@dataclass
-class BenchmarkMetadata:
-    unit: str
-    suite: str
-    lower_is_better: bool
+def generate_html(benchmark_runs: list, compare_names: list[str]):
 
+    # Get unique suite names
+    suite_names = {result.suite for run in benchmark_runs for result in run.results}
 
-@dataclass
-class BenchmarkSeries:
-    label: str
-    metadata: BenchmarkMetadata
-    runs: list[BenchmarkRun]
+    # create path to data.js in html folder
+    data_path = os.path.join(os.path.dirname(__file__), "html", "data.js")
 
+    # Write data to js file
+    # We can't store this as a standalone json file because it needs to be inline in the html
+    with open(data_path, "w") as f:
+        f.write("const benchmarkRuns = [\n")
+        # it might be tempting to just to create a list and convert
+        # that to a json, but that leads to json being serialized twice.
+        for i, run in enumerate(benchmark_runs):
+            if i > 0:
+                f.write(",\n")
+            f.write(run.to_json())
 
-@dataclass
-class BenchmarkChart:
-    label: str
-    suite: str
-    html: str
+        f.write("\n];\n\n")  # terminates benchmarkRuns
 
-
-def tooltip_css() -> str:
-    return ".mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}"
-
-
-def create_time_series_chart(
-    benchmarks: list[BenchmarkSeries], github_repo: str
-) -> list[BenchmarkChart]:
-    plt.close("all")
-
-    num_benchmarks = len(benchmarks)
-    if num_benchmarks == 0:
-        return []
-
-    html_charts = []
-
-    for _, benchmark in enumerate(benchmarks):
-        fig, ax = plt.subplots(figsize=(10, 4))
-
-        all_values = []
-        all_stddevs = []
-
-        for run in benchmark.runs:
-            sorted_points = sorted(run.results, key=lambda x: x.date)
-            dates = [point.date for point in sorted_points]
-            values = [point.value for point in sorted_points]
-            stddevs = [point.stddev for point in sorted_points]
-
-            all_values.extend(values)
-            all_stddevs.extend(stddevs)
-
-            ax.errorbar(dates, values, yerr=stddevs, fmt="-", label=run.name, alpha=0.5)
-            scatter = ax.scatter(dates, values, picker=True)
-
-            tooltip_labels = [
-                f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
-                f"Value: {point.value:.2f} {benchmark.metadata.unit}\n"
-                f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n"
-                f"Git Hash: {point.git_hash}"
-                for point in sorted_points
-            ]
-
-            targets = [
-                f"https://github.com/{github_repo}/commit/{point.git_hash}"
-                for point in sorted_points
-            ]
-
-            tooltip = mpld3.plugins.PointHTMLTooltip(
-                scatter, tooltip_labels, css=tooltip_css(), targets=targets
-            )
-            mpld3.plugins.connect(fig, tooltip)
-
-        ax.set_title(benchmark.label, pad=20)
-        performance_indicator = (
-            "lower is better"
-            if benchmark.metadata.lower_is_better
-            else "higher is better"
-        )
-        ax.text(
-            0.5,
-            1.05,
-            f"({performance_indicator})",
-            ha="center",
-            transform=ax.transAxes,
-            style="italic",
-            fontsize=7,
-            color="#666666",
-        )
-
-        ax.set_xlabel("")
-        unit = benchmark.metadata.unit
-        ax.set_ylabel(f"Value ({unit})" if unit else "Value")
-        ax.grid(True, alpha=0.2)
-        ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
-        ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter("%Y-%m-%d %H:%M:%S"))
-
-        plt.tight_layout()
-        html_charts.append(
-            BenchmarkChart(
-                html=mpld3.fig_to_html(fig),
-                label=benchmark.label,
-                suite=benchmark.metadata.suite,
-            )
-        )
-        plt.close(fig)
-
-    return html_charts
-
-
-@dataclass
-class ExplicitGroup:
-    name: str
-    nnames: int
-    metadata: BenchmarkMetadata
-    runs: dict[str, dict[str, Result]]
-
-
-def create_explicit_groups(
-    benchmark_runs: list[BenchmarkRun], compare_names: list[str]
-) -> list[ExplicitGroup]:
-    groups = {}
-
-    for run in benchmark_runs:
-        if run.name in compare_names:
-            for res in run.results:
-                if res.explicit_group != "":
-                    if res.explicit_group not in groups:
-                        groups[res.explicit_group] = ExplicitGroup(
-                            name=res.explicit_group,
-                            nnames=len(compare_names),
-                            metadata=BenchmarkMetadata(
-                                unit=res.unit,
-                                lower_is_better=res.lower_is_better,
-                                suite=res.suite,
-                            ),
-                            runs={},
-                        )
-
-                    group = groups[res.explicit_group]
-                    if res.label not in group.runs:
-                        group.runs[res.label] = {name: None for name in compare_names}
-
-                    if group.runs[res.label][run.name] is None:
-                        group.runs[res.label][run.name] = res
-
-    return list(groups.values())
-
-
-def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]:
-    plt.close("all")
-
-    html_charts = []
-
-    for group in groups:
-        fig, ax = plt.subplots(figsize=(10, 6))
-
-        x = np.arange(group.nnames)
-        x_labels = []
-        width = 0.8 / len(group.runs)
-
-        max_height = 0
-
-        for i, (run_name, run_results) in enumerate(group.runs.items()):
-            offset = width * i
-
-            positions = x + offset
-            x_labels = run_results.keys()
-            valid_data = [r.value if r is not None else 0 for r in run_results.values()]
-            rects = ax.bar(positions, valid_data, width, label=run_name)
-            # This is a hack to disable all bar_label. Setting labels to empty doesn't work.
-            # We create our own labels below for each bar, this works better in mpld3.
-            ax.bar_label(rects, fmt="")
-
-            for rect, run, res in zip(rects, run_results.keys(), run_results.values()):
-                if res is None:
-                    continue
-
-                height = rect.get_height()
-                if height > max_height:
-                    max_height = height
-
-                ax.text(
-                    rect.get_x() + rect.get_width() / 2.0,
-                    height + 1,
-                    f"{res.value:.1f}",
-                    ha="center",
-                    va="bottom",
-                    fontsize=9,
-                )
-
-                tooltip_labels = [
-                    f"Date: {res.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
-                    f"Run: {run}\n"
-                    f"Label: {res.label}\n"
-                    f"Value: {res.value:.2f} {res.unit}\n"
-                    f"Stddev: {res.stddev:.2f} {res.unit}\n"
-                ]
-                tooltip = mpld3.plugins.LineHTMLTooltip(
-                    rect, tooltip_labels, css=tooltip_css()
-                )
-                mpld3.plugins.connect(ax.figure, tooltip)
-
-        # normally we'd just set legend to be outside
-        # the chart, but this is not supported by mpld3.
-        # instead, we adjust the y axis to account for
-        # the height of the bars.
-        legend_height = len(group.runs) * 0.1
-        ax.set_ylim(0, max_height * (1 + legend_height))
-
-        ax.set_xticks([])
-        ax.grid(True, axis="y", alpha=0.2)
-        ax.set_ylabel(f"Value ({group.metadata.unit})")
-        ax.legend(loc="upper left")
-        ax.set_title(group.name, pad=20)
-        performance_indicator = (
-            "lower is better" if group.metadata.lower_is_better else "higher is better"
-        )
-        ax.text(
-            0.5,
-            1.03,
-            f"({performance_indicator})",
-            ha="center",
-            transform=ax.transAxes,
-            style="italic",
-            fontsize=7,
-            color="#666666",
-        )
-
-        for idx, label in enumerate(x_labels):
-            # this is a hack to get labels to show above the legend
-            # we normalize the idx to transAxes transform and offset it a little.
-            x_norm = (idx + 0.3 - ax.get_xlim()[0]) / (
-                ax.get_xlim()[1] - ax.get_xlim()[0]
-            )
-            ax.text(x_norm, 1.03, label, transform=ax.transAxes, color="#666666")
-
-        plt.tight_layout()
-        html_charts.append(
-            BenchmarkChart(
-                label=group.name,
-                html=mpld3.fig_to_html(fig),
-                suite=group.metadata.suite,
-            )
-        )
-        plt.close(fig)
-
-    return html_charts
-
-
-def process_benchmark_data(
-    benchmark_runs: list[BenchmarkRun], compare_names: list[str]
-) -> list[BenchmarkSeries]:
-    benchmark_metadata: dict[str, BenchmarkMetadata] = {}
-    run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list))
-
-    for run in benchmark_runs:
-        if run.name not in compare_names:
-            continue
-
-        for result in run.results:
-            if result.label not in benchmark_metadata:
-                benchmark_metadata[result.label] = BenchmarkMetadata(
-                    unit=result.unit,
-                    lower_is_better=result.lower_is_better,
-                    suite=result.suite,
-                )
-
-            result.date = run.date
-            result.git_hash = run.git_hash
-            run_map[result.label][run.name].append(result)
-
-    benchmark_series = []
-    for label, metadata in benchmark_metadata.items():
-        runs = [
-            BenchmarkRun(name=run_name, results=results)
-            for run_name, results in run_map[label].items()
-        ]
-        benchmark_series.append(
-            BenchmarkSeries(label=label, metadata=metadata, runs=runs)
-        )
-
-    return benchmark_series
-
-
-def generate_html(
-    benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]
-) -> str:
-    benchmarks = process_benchmark_data(benchmark_runs, compare_names)
-
-    timeseries = create_time_series_chart(benchmarks, github_repo)
-    timeseries_charts_html = "\n".join(
-        f'<div class="chart" data-label="{ts.label}" data-suite="{ts.suite}"><div>{ts.html}</div></div>'
-        for ts in timeseries
-    )
-
-    explicit_groups = create_explicit_groups(benchmark_runs, compare_names)
-
-    bar_charts = create_grouped_bar_charts(explicit_groups)
-    bar_charts_html = "\n".join(
-        f'<div class="chart" data-label="{bc.label}" data-suite="{bc.suite}"><div>{bc.html}</div></div>'
-        for bc in bar_charts
-    )
-
-    suite_names = {t.suite for t in timeseries}
-    suite_checkboxes_html = " ".join(
-        f'<label><input type="checkbox" class="suite-checkbox" data-suite="{suite}" checked> {suite}</label>'
-        for suite in suite_names
-    )
-
-    script_path = os.path.dirname(os.path.realpath(__file__))
-    results_template_path = Path(script_path, "benchmark_results.html.template")
-    with open(results_template_path, "r") as file:
-        html_template = file.read()
-
-    template = Template(html_template)
-    data = {
-        "suite_checkboxes_html": suite_checkboxes_html,
-        "timeseries_charts_html": timeseries_charts_html,
-        "bar_charts_html": bar_charts_html,
-    }
-
-    return template.substitute(data)
+        # these are not const because they might be modified
+        # in config.js
+        f.write("defaultCompareNames = ")
+        json.dump(compare_names, f)
+        f.write(";\n\n")  # terminates defaultCompareNames
+        f.write("suiteNames = ")
+        json.dump(list(suite_names), f)
+        f.write(";")  # terminates suiteNames
diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
index dd6711cec6365..18b5779473a75 100644
--- a/devops/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import collections
-from benches.result import Result
+from utils.result import Result
 from options import options, MarkdownSize
 import ast
 
@@ -138,17 +138,6 @@ def generate_markdown_details(
         env_dict = res.env
         command = res.command
 
-        # If data is collected from already saved results,
-        # the content is parsed as strings
-        if isinstance(res.env, str):
-            # Since the scripts would be used solely on data prepared
-            # by our scripts, this should be safe
-            # However, maybe needs an additional blessing
-            # https://docs.python.org/3/library/ast.html#ast.literal_eval
-            env_dict = ast.literal_eval(res.env)
-        if isinstance(res.command, str):
-            command = ast.literal_eval(res.command)
-
         section = (
             "\n<details>\n"
             f"<summary>{res.label}</summary>\n\n"
@@ -179,7 +168,7 @@ def generate_markdown_details(
             return "\nBenchmark details contain too many chars to display\n"
 
 
-def generate_summary_table_and_chart(
+def generate_summary_table(
     chart_data: dict[str, list[Result]], baseline_name: str, markdown_size: MarkdownSize
 ):
     summary_table = get_chart_markdown_header(
@@ -374,10 +363,27 @@ def generate_summary_table_and_chart(
                 return "\n# Summary\n" "Benchmark output is too large to display\n\n"
 
 
+def generate_failures_section(failures: dict[str, str]) -> str:
+    if not failures:
+        return ""
+
+    section = "\n# Failures\n"
+    section += "| Name | Failure |\n"
+    section += "|---|---|\n"
+
+    for name, failure in failures.items():
+        section += f"| {name} | {failure} |\n"
+
+    return section
+
+
 def generate_markdown(
-    name: str, chart_data: dict[str, list[Result]], markdown_size: MarkdownSize
+    name: str,
+    chart_data: dict[str, list[Result]],
+    failures: dict[str, str],
+    markdown_size: MarkdownSize,
 ):
-    (summary_line, summary_table) = generate_summary_table_and_chart(
+    (summary_line, summary_table) = generate_summary_table(
         chart_data, name, markdown_size
     )
 
@@ -396,4 +402,6 @@ def generate_markdown(
         )
         generated_markdown += "\n# Details\n" f"{markdown_details}\n"
 
-    return generated_markdown
+    failures_section = generate_failures_section(failures)
+
+    return failures_section + generated_markdown
diff --git a/devops/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py
similarity index 79%
rename from devops/scripts/benchmarks/benches/oneapi.py
rename to devops/scripts/benchmarks/utils/oneapi.py
index 0547f6646e39e..e1876b5ed37fb 100644
--- a/devops/scripts/benchmarks/benches/oneapi.py
+++ b/devops/scripts/benchmarks/utils/oneapi.py
@@ -7,29 +7,33 @@
 from utils.utils import download, run
 from options import options
 import os
+import hashlib
 
 
 class OneAPI:
-    # random unique number for benchmark oneAPI installation
-    ONEAPI_BENCHMARK_INSTANCE_ID = 987654
-
     def __init__(self):
         self.oneapi_dir = os.path.join(options.workdir, "oneapi")
         Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True)
-        # delete if some option is set?
+        self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir)
 
         # can we just hardcode these links?
         self.install_package(
             "dnnl",
             "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh",
+            "6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b",
         )
         self.install_package(
             "mkl",
             "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh",
+            "122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002",
         )
         return
 
-    def install_package(self, name, url):
+    def generate_unique_oneapi_id(self, path):
+        hash_object = hashlib.md5(path.encode())
+        return hash_object.hexdigest()
+
+    def install_package(self, name, url, checksum):
         package_path = os.path.join(self.oneapi_dir, name)
         if Path(package_path).exists():
             print(
@@ -37,11 +41,13 @@ def install_package(self, name, url):
             )
             return
 
-        package = download(self.oneapi_dir, url, f"package_{name}.sh")
+        package = download(
+            self.oneapi_dir, url, f"package_{name}.sh", checksum=checksum
+        )
         try:
             print(f"installing {name}")
             run(
-                f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}"
+                f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance {self.oneapi_instance_id}"
             )
         except:
             print("oneAPI installation likely exists already")
diff --git a/devops/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/utils/result.py
similarity index 69%
rename from devops/scripts/benchmarks/benches/result.py
rename to devops/scripts/benchmarks/utils/result.py
index 52a098d91c24a..4e65a3b8aa582 100644
--- a/devops/scripts/benchmarks/benches/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -3,9 +3,9 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Optional
-from dataclasses_json import dataclass_json
+from dataclasses_json import config, dataclass_json
 from datetime import datetime
 
 
@@ -14,8 +14,8 @@
 class Result:
     label: str
     value: float
-    command: str
-    env: str
+    command: list[str]
+    env: dict[str, str]
     stdout: str
     passed: bool = True
     unit: str = ""
@@ -26,9 +26,8 @@ class Result:
     # values below should not be set by the benchmark
     name: str = ""
     lower_is_better: bool = True
-    git_hash: str = ""
-    date: Optional[datetime] = None
     suite: str = "Unknown"
+    description: str = "No description provided."
 
 
 @dataclass_json
@@ -37,4 +36,8 @@ class BenchmarkRun:
     results: list[Result]
     name: str = "This PR"
     git_hash: str = ""
-    date: datetime = None
+    github_repo: str = None
+    date: datetime = field(
+        default=None,
+        metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat),
+    )
diff --git a/devops/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
index d3d88f417cb8b..ba26127ce37b9 100644
--- a/devops/scripts/benchmarks/utils/utils.py
+++ b/devops/scripts/benchmarks/utils/utils.py
@@ -12,6 +12,7 @@
 import urllib  # nosec B404
 from options import options
 from pathlib import Path
+import hashlib
 
 
 def run(
@@ -42,6 +43,12 @@ def run(
 
         env.update(env_vars)
 
+        if options.verbose:
+            command_str = " ".join(command)
+            env_str = " ".join(f"{key}={value}" for key, value in env_vars.items())
+            full_command_str = f"{env_str} {command_str}".strip()
+            print(f"Running: {full_command_str}")
+
         result = subprocess.run(
             command,
             cwd=cwd,
@@ -104,7 +111,7 @@ def prepare_workdir(dir, version):
                 shutil.rmtree(dir)
         else:
             raise Exception(
-                f"The directory {dir} exists but is a benchmark work directory."
+                f"The directory {dir} exists but is not a benchmark work directory."
             )
 
     os.makedirs(dir)
@@ -125,11 +132,26 @@ def create_build_path(directory, name):
     return build_path
 
 
-def download(dir, url, file, untar=False, unzip=False):
+def calculate_checksum(file_path):
+    sha_hash = hashlib.sha384()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha_hash.update(byte_block)
+    return sha_hash.hexdigest()
+
+
+def download(dir, url, file, untar=False, unzip=False, checksum=""):
     data_file = os.path.join(dir, file)
     if not Path(data_file).exists():
         print(f"{data_file} does not exist, downloading")
         urllib.request.urlretrieve(url, data_file)
+        calculated_checksum = calculate_checksum(data_file)
+        if calculated_checksum != checksum:
+            print(
+                f"Checksum mismatch: expected {checksum}, got {calculated_checksum}. Refusing to continue."
+            )
+            exit(1)
+
         if untar:
             file = tarfile.open(data_file)
             file.extractall(dir)

From 3cbed5e3391366f16b6ff11d2a0d2e7a68511b58 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 5 Mar 2025 13:51:42 -0800
Subject: [PATCH 05/79] Test UR benchmarking suite

---
 devops/actions/run-tests/benchmark/action.yml | 46 ++++++++++---------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 69631d044891c..9846b5c8bd6c6 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -47,6 +47,7 @@ runs:
           echo "#" ;;
       esac
   - name: Compute CPU core range to run benchmarks on
+    shell: bash
     run: |
       # Taken from ur-benchmark-reusable.yml:
 
@@ -89,27 +90,30 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
-  - name: Push compute-benchmarks results
-    if: always()
-    shell: bash
-    run: |
-      # TODO -- waiting on security clearance
-      # Load configuration values
-      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-
-      cd "./llvm-ci-perf-results"
-      git config user.name "SYCL Benchmarking Bot"
-      git config user.email "sys_sycl_benchmarks@intel.com"
-      git pull
-      git add .
-      # Make sure changes have been made
-      if git diff --quiet && git diff --cached --quiet; then
-        echo "No new results added, skipping push."
-      else
-        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
-      fi
+      mkdir -v ./llvm_test_workdir
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
+      echo "-----"
+      ls
+#  - name: Push compute-benchmarks results
+#    if: always()
+#    shell: bash
+#    run: |
+#      # TODO -- waiting on security clearance
+#      # Load configuration values
+#      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+#
+#      cd "./llvm-ci-perf-results"
+#      git config user.name "SYCL Benchmarking Bot"
+#      git config user.email "sys_sycl_benchmarks@intel.com"
+#      git pull
+#      git add .
+#      # Make sure changes have been made
+#      if git diff --quiet && git diff --cached --quiet; then
+#        echo "No new results added, skipping push."
+#      else
+#        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+#        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+#      fi
   - name: Find benchmark result artifact here
     if: always()
     shell: bash

From f79bbbfefe01c64963286c5aed5f84848b755200 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 5 Mar 2025 14:49:57 -0800
Subject: [PATCH 06/79] Bump tolerance to 7%

---
 devops/benchmarking/config.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini
index c0b3ca9c31c9e..6b0ecc21f940f 100644
--- a/devops/benchmarking/config.ini
+++ b/devops/benchmarking/config.ini
@@ -23,7 +23,7 @@ recorded = Median,StdDev
 ; the historical average. Metrics not included here are not compared against
 ; when passing/failing benchmark results.
 ; Format: comma-separated list of <metric>:<deviation percentage in decimals>
-tolerances = Median:0.5
+tolerances = Median:0.7
 
 ; Options for computing historical averages
 [average]

From ffc813919aa9f165b040fa11742d5bd909befabe Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 5 Mar 2025 14:50:30 -0800
Subject: [PATCH 07/79] Revert "Bump tolerance to 7%"

This reverts commit f79bbbfefe01c64963286c5aed5f84848b755200.
---
 devops/benchmarking/config.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini
index 6b0ecc21f940f..c0b3ca9c31c9e 100644
--- a/devops/benchmarking/config.ini
+++ b/devops/benchmarking/config.ini
@@ -23,7 +23,7 @@ recorded = Median,StdDev
 ; the historical average. Metrics not included here are not compared against
 ; when passing/failing benchmark results.
 ; Format: comma-separated list of <metric>:<deviation percentage in decimals>
-tolerances = Median:0.7
+tolerances = Median:0.5
 
 ; Options for computing historical averages
 [average]

From 0a34e0d0914de06e0a086cbdcd44d0f1589447e2 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 6 Mar 2025 12:20:13 +0000
Subject: [PATCH 08/79] [benchmarks] fix failing benchmarks, improve html
 output

... and add presets to more easily
---
 devops/scripts/benchmarks/benches/compute.py  |  2 +-
 devops/scripts/benchmarks/benches/llamacpp.py |  2 +-
 .../scripts/benchmarks/benches/syclbench.py   | 13 ++--
 devops/scripts/benchmarks/benches/velocity.py |  2 +-
 devops/scripts/benchmarks/history.py          |  5 +-
 devops/scripts/benchmarks/html/config.js      |  7 +--
 devops/scripts/benchmarks/html/scripts.js     | 46 ++++++++------
 devops/scripts/benchmarks/main.py             | 38 ++++++-----
 devops/scripts/benchmarks/options.py          |  7 +--
 devops/scripts/benchmarks/output_html.py      | 63 +++++++++++--------
 devops/scripts/benchmarks/output_markdown.py  |  4 +-
 devops/scripts/benchmarks/presets.py          | 50 +++++++++++++++
 12 files changed, 153 insertions(+), 86 deletions(-)
 create mode 100644 devops/scripts/benchmarks/presets.py

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 18ed969728902..d35a8e2791648 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -170,7 +170,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
-                    description=self.description()
+                    description=self.description(),
                 )
             )
         return ret
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index d8e0ab5d007bb..c12f811942849 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -139,7 +139,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
-                    description=self.description()
+                    description=self.description(),
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 47326b2555a68..cc2db0a2fcf7c 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -105,7 +105,6 @@ def __init__(self, bench, name, test):
         self.bench = bench
         self.bench_name = name
         self.test = test
-        self.done = False
 
     def bin_args(self) -> list[str]:
         return []
@@ -119,8 +118,6 @@ def setup(self):
         )
 
     def run(self, env_vars) -> list[Result]:
-        if self.done:
-            return
         self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
 
         command = [
@@ -152,17 +149,17 @@ def run(self, env_vars) -> list[Result]:
                             unit="ms",
                         )
                     )
-        self.done = True
-        return res_list
 
-    def teardown(self):
-        print(f"Removing {self.outputfile}...")
         os.remove(self.outputfile)
-        return
+
+        return res_list
 
     def name(self):
         return f"{self.bench.name()} {self.test}"
 
+    def teardown(self):
+        return
+
 
 # multi benchmarks
 class Blocked_transform(SyclBenchmark):
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index be36c47ca36d5..652a831d0222e 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -136,7 +136,7 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
-                description=self.description()
+                description=self.description(),
             )
         ]
 
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 2bb0b9db8ea38..2b7002ed7faa9 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -61,11 +61,12 @@ def extract_index(file_path: Path) -> int:
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
-            result = run("git rev-parse --short HEAD")
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            result = run("git rev-parse --short HEAD", cwd=script_dir)
             git_hash = result.stdout.decode().strip()
 
             # Get the GitHub repo URL from git remote
-            remote_result = run("git remote get-url origin")
+            remote_result = run("git remote get-url origin", cwd=script_dir)
             remote_url = remote_result.stdout.decode().strip()
 
             # Convert SSH or HTTPS URL to owner/repo format
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
index c1210b2b21da5..3e67ae1dce8e5 100644
--- a/devops/scripts/benchmarks/html/config.js
+++ b/devops/scripts/benchmarks/html/config.js
@@ -1,5 +1,2 @@
-const config = {
-    remoteDataUrl: ''
-};
-// defaultCompareNames = [];
-// suiteNames = [];
+//remoteDataUrl = 'https://example.com/data.json';
+//defaultCompareNames = ['baseline'];
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 8f0272048136d..7b8b4d742cca2 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -114,14 +114,12 @@ function createChart(data, containerId, type) {
 
     const chartConfig = {
         type: type === 'time' ? 'line' : 'bar',
-        data: type === 'time' ?
-            {
-                datasets: createTimeseriesDatasets(data)
-            } :
-            {
-                labels: data.labels,
-                datasets: data.datasets
-            },
+        data: type === 'time' ? {
+            datasets: createTimeseriesDatasets(data)
+        } : {
+            labels: data.labels,
+            datasets: data.datasets
+        },
         options: options
     };
 
@@ -221,10 +219,12 @@ function createChartContainer(data, canvasId) {
     summary.appendChild(downloadButton);
     details.appendChild(summary);
 
+    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
+
     // Create and append extra info
     const extraInfo = document.createElement('div');
     extraInfo.className = 'extra-info';
-    extraInfo.innerHTML = generateExtraInfo(data);
+    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
     details.appendChild(extraInfo);
 
     container.appendChild(details);
@@ -252,9 +252,8 @@ function createLatestRunsLookup(benchmarkRuns) {
 
     return latestRunsMap;
 }
-const latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
 
-function generateExtraInfo(data) {
+function generateExtraInfo(latestRunsLookup, data) {
     const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
 
     return labels.map(label => {
@@ -283,7 +282,7 @@ function downloadChart(canvasId, label) {
     const chart = chartInstances.get(canvasId);
     if (chart) {
         const link = document.createElement('a');
-        link.href = chart.toBase64Image('image/jpeg', 1)
+        link.href = chart.toBase64Image('image/png', 1)
         link.download = `${label}.png`;
         link.click();
     }
@@ -445,6 +444,13 @@ function setupRunSelector() {
 function setupSuiteFilters() {
     suiteFiltersContainer = document.getElementById('suite-filters');
 
+    const suiteNames = new Set();
+    benchmarkRuns.forEach(run => {
+        run.results.forEach(result => {
+            suiteNames.add(result.suite);
+        });
+    });
+
     suiteNames.forEach(suite => {
         const label = document.createElement('label');
         const checkbox = document.createElement('input');
@@ -530,16 +536,18 @@ function loadData() {
     const loadingIndicator = document.getElementById('loading-indicator');
     loadingIndicator.style.display = 'block'; // Show loading indicator
 
-    if (config.remoteDataUrl && config.remoteDataUrl !== '') {
+    if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') {
         // Fetch data from remote URL
-        fetch(config.remoteDataUrl)
-            .then(response => response.text())
-            .then(scriptContent => {
-                // Evaluate the script content
-                eval(scriptContent);
+        fetch(remoteDataUrl)
+            .then(response => response.json())
+            .then(data => {
+                benchmarkRuns = data;
                 initializeCharts();
             })
-            .catch(error => console.error('Error fetching remote data:', error))
+            .catch(error => {
+                console.error('Error fetching remote data:', error);
+                loadingIndicator.textContent = 'Fetching remote data failed.';
+            })
             .finally(() => {
                 loadingIndicator.style.display = 'none'; // Hide loading indicator
             });
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 8f5330d7b4f62..716f162c48feb 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,6 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
+from presets import Presets
 
 import argparse
 import re
@@ -153,7 +154,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             SyclBench(directory),
             LlamaCppBench(directory),
             UMFSuite(directory),
-            # TestSuite()
+            TestSuite()
         ]
         if not options.dry_run
         else []
@@ -163,6 +164,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     failures = {}
 
     for s in suites:
+        if s.name() not in options.preset.suites():
+            continue
+
         suite_benchmarks = s.benchmarks()
         if filter:
             suite_benchmarks = [
@@ -182,14 +186,13 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
                 print(f"{type(s).__name__} setup complete.")
                 benchmarks += suite_benchmarks
 
-    for b in benchmarks:
-        print(b.name())
-
     for benchmark in benchmarks:
         try:
-            print(f"Setting up {benchmark.name()}... ")
+            if options.verbose:
+                print(f"Setting up {benchmark.name()}... ")
             benchmark.setup()
-            print(f"{benchmark.name()} setup complete.")
+            if options.verbose:
+                print(f"{benchmark.name()} setup complete.")
 
         except Exception as e:
             if options.exit_on_failure:
@@ -279,8 +282,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if options.output_html:
         generate_html(history.runs, compare_names)
 
-        print(f"See {os.getcwd()}/html/index.html for the results.")
-
 
 def validate_and_parse_env_args(env_args):
     env_vars = {}
@@ -362,12 +363,6 @@ def validate_and_parse_env_args(env_args):
         help="Regex pattern to filter benchmarks by name.",
         default=None,
     )
-    parser.add_argument(
-        "--epsilon",
-        type=float,
-        help="Threshold to consider change of performance significant",
-        default=options.epsilon,
-    )
     parser.add_argument(
         "--verbose", help="Print output of all the commands.", action="store_true"
     )
@@ -394,7 +389,11 @@ def validate_and_parse_env_args(env_args):
         help="Specify whether markdown output should fit the content size limit for request validation",
     )
     parser.add_argument(
-        "--output-html", help="Create HTML output", action="store_true", default=False
+        "--output-html",
+        help="Create HTML output. Local output is for direct local viewing of the html file, remote is for server deployment.",
+        nargs="?",
+        const=options.output_html,
+        choices=["local", "remote"],
     )
     parser.add_argument(
         "--dry-run",
@@ -438,6 +437,13 @@ def validate_and_parse_env_args(env_args):
         help="Directory for cublas library",
         default=None,
     )
+    parser.add_argument(
+        "--preset",
+        type=str,
+        choices=[p.name for p in Presets],
+        help="Benchmark preset to run.",
+        default='FULL',
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -449,7 +455,6 @@ def validate_and_parse_env_args(env_args):
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
-    options.epsilon = args.epsilon
     options.ur = args.ur
     options.ur_adapter = args.adapter
     options.exit_on_failure = args.exit_on_failure
@@ -464,6 +469,7 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
+    options.preset = Presets[args.preset].value()
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 206ca94eb0d0b..fd08ce83d145e 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
-
+from presets import Preset
 
 class Compare(Enum):
     LATEST = "latest"
@@ -29,11 +29,9 @@ class Options:
     compare: Compare = Compare.LATEST
     compare_max: int = 10  # average/median over how many results
     output_markdown: MarkdownSize = MarkdownSize.SHORT
-    output_html: bool = False
+    output_html: str = "local"
     dry_run: bool = False
-    # these two should probably be merged into one setting
     stddev_threshold: float = 0.02
-    epsilon: float = 0.02
     iterations_stddev: int = 5
     build_compute_runtime: bool = False
     extra_ld_libraries: list[str] = field(default_factory=list)
@@ -41,6 +39,7 @@ class Options:
     compute_runtime_tag: str = "25.05.32567.12"
     build_igc: bool = False
     current_run_name: str = "This PR"
+    preset: Preset = None
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index e6e3212dbcdb2..d84fd659beb6c 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -5,34 +5,43 @@
 
 import json
 import os
+from options import options
 
 
 def generate_html(benchmark_runs: list, compare_names: list[str]):
-
-    # Get unique suite names
-    suite_names = {result.suite for run in benchmark_runs for result in run.results}
-
     # create path to data.js in html folder
-    data_path = os.path.join(os.path.dirname(__file__), "html", "data.js")
-
-    # Write data to js file
-    # We can't store this as a standalone json file because it needs to be inline in the html
-    with open(data_path, "w") as f:
-        f.write("const benchmarkRuns = [\n")
-        # it might be tempting to just to create a list and convert
-        # that to a json, but that leads to json being serialized twice.
-        for i, run in enumerate(benchmark_runs):
-            if i > 0:
-                f.write(",\n")
-            f.write(run.to_json())
-
-        f.write("\n];\n\n")  # terminates benchmarkRuns
-
-        # these are not const because they might be modified
-        # in config.js
-        f.write("defaultCompareNames = ")
-        json.dump(compare_names, f)
-        f.write(";\n\n")  # terminates defaultCompareNames
-        f.write("suiteNames = ")
-        json.dump(list(suite_names), f)
-        f.write(";")  # terminates suiteNames
+    html_path = os.path.join(os.path.dirname(__file__), "html")
+
+    if options.output_html == "local":
+        data_path = os.path.join(html_path, "data.js")
+        # Write data to js file
+        # We can't store this as a standalone json file because it needs to be inline in the html
+        with open(data_path, "w") as f:
+            f.write("benchmarkRuns = [\n")
+            # it might be tempting to just to create a list and convert
+            # that to a json, but that leads to json being serialized twice.
+            for i, run in enumerate(benchmark_runs):
+                if i > 0:
+                    f.write(",\n")
+                f.write(run.to_json())
+
+            f.write("\n];\n\n")  # terminates benchmarkRuns
+
+            f.write("defaultCompareNames = ")
+            json.dump(compare_names, f)
+            f.write(";\n")  # terminates defaultCompareNames
+
+        print(f"See {os.getcwd()}/html/index.html for the results.")
+    else:
+        data_path = os.path.join(html_path, "data.json")
+        with open(data_path, "w") as f:
+            f.write("[\n")
+            for i, run in enumerate(benchmark_runs):
+                if i > 0:
+                    f.write(",\n")
+                f.write(run.to_json())
+            f.write("\n];\n")
+
+        print(
+            f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
+        )
diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
index 18b5779473a75..3295968603d0c 100644
--- a/devops/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -79,7 +79,7 @@ def get_improved_regressed_summary(is_improved: bool, rows_count: int):
         "\n<details>\n"
         "<summary>\n"
         f"{title} {rows_count} "
-        f"(threshold {options.epsilon*100:.2f}%)\n"
+        f"(threshold {options.stddev_threshold*100:.2f}%)\n"
         "</summary>\n\n"
     )
 
@@ -265,7 +265,7 @@ def generate_summary_table(
                 delta = oln.diff - 1
                 oln.row += f" {delta*100:.2f}%"
 
-                if abs(delta) > options.epsilon:
+                if abs(delta) > options.stddev_threshold:
                     if delta > 0:
                         improved_rows.append(oln.row + " | \n")
                     else:
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
new file mode 100644
index 0000000000000..46f8257cd01ec
--- /dev/null
+++ b/devops/scripts/benchmarks/presets.py
@@ -0,0 +1,50 @@
+# Copyright (C) 2024 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from enum import Enum
+
+class Preset():
+    def description(self):
+        pass
+    def suites(self) -> list[str]:
+        return []
+
+class Full(Preset):
+    def description(self):
+        return "All available benchmarks."
+    def suites(self) -> list[str]:
+        return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench', 'UMF']
+
+class SYCL(Preset):
+    def description(self):
+        return "All available benchmarks related to SYCL."
+    def suites(self) -> list[str]:
+        return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench']
+
+class Minimal(Preset):
+    def description(self):
+        return "Short microbenchmarks."
+    def suites(self) -> list[str]:
+        return ['Compute Benchmarks']
+
+class Normal(Preset):
+    def description(self):
+        return "Comprehensive mix of microbenchmarks and real applications."
+    def suites(self) -> list[str]:
+        return ['Compute Benchmarks']
+
+class Test(Preset):
+    def description(self):
+        return "Noop benchmarks for framework testing."
+    def suites(self) -> list[str]:
+        return ['Test Suite']
+
+
+class Presets(Enum):
+    FULL = Full
+    SYCL = SYCL # Nightly
+    NORMAL = Normal # PR
+    MINIMAL = Minimal # Quick smoke tests
+    TEST = Test

From 3f42420d95522557ff09c45aa5db480d1f636eda Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 6 Mar 2025 13:47:30 +0000
Subject: [PATCH 09/79] [benchmarks] fix python formatting with black

---
 devops/scripts/benchmarks/main.py    |  4 ++--
 devops/scripts/benchmarks/options.py |  1 +
 devops/scripts/benchmarks/presets.py | 36 +++++++++++++++++++++-------
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 716f162c48feb..4df66d7ad9c4c 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -154,7 +154,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             SyclBench(directory),
             LlamaCppBench(directory),
             UMFSuite(directory),
-            TestSuite()
+            TestSuite(),
         ]
         if not options.dry_run
         else []
@@ -442,7 +442,7 @@ def validate_and_parse_env_args(env_args):
         type=str,
         choices=[p.name for p in Presets],
         help="Benchmark preset to run.",
-        default='FULL',
+        default="FULL",
     )
 
     args = parser.parse_args()
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index fd08ce83d145e..7f4f3a9a32eb3 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -2,6 +2,7 @@
 from enum import Enum
 from presets import Preset
 
+
 class Compare(Enum):
     LATEST = "latest"
     AVERAGE = "average"
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 46f8257cd01ec..4db68a925a54e 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -5,46 +5,64 @@
 
 from enum import Enum
 
-class Preset():
+
+class Preset:
     def description(self):
         pass
+
     def suites(self) -> list[str]:
         return []
 
+
 class Full(Preset):
     def description(self):
         return "All available benchmarks."
+
     def suites(self) -> list[str]:
-        return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench', 'UMF']
+        return [
+            "Compute Benchmarks",
+            "llama.cpp bench",
+            "SYCL-Bench",
+            "Velocity Bench",
+            "UMF",
+        ]
+
 
 class SYCL(Preset):
     def description(self):
         return "All available benchmarks related to SYCL."
+
     def suites(self) -> list[str]:
-        return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench']
+        return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"]
+
 
 class Minimal(Preset):
     def description(self):
         return "Short microbenchmarks."
+
     def suites(self) -> list[str]:
-        return ['Compute Benchmarks']
+        return ["Compute Benchmarks"]
+
 
 class Normal(Preset):
     def description(self):
         return "Comprehensive mix of microbenchmarks and real applications."
+
     def suites(self) -> list[str]:
-        return ['Compute Benchmarks']
+        return ["Compute Benchmarks"]
+
 
 class Test(Preset):
     def description(self):
         return "Noop benchmarks for framework testing."
+
     def suites(self) -> list[str]:
-        return ['Test Suite']
+        return ["Test Suite"]
 
 
 class Presets(Enum):
     FULL = Full
-    SYCL = SYCL # Nightly
-    NORMAL = Normal # PR
-    MINIMAL = Minimal # Quick smoke tests
+    SYCL = SYCL  # Nightly
+    NORMAL = Normal  # PR
+    MINIMAL = Minimal  # Quick smoke tests
     TEST = Test

From 1c7b189db0c8a8d2883ced52ac3e2b45840c792d Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 6 Mar 2025 14:35:11 +0000
Subject: [PATCH 10/79] update driver version

---
 devops/scripts/benchmarks/options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 7f4f3a9a32eb3..aba5aac434917 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -37,7 +37,7 @@ class Options:
     build_compute_runtime: bool = False
     extra_ld_libraries: list[str] = field(default_factory=list)
     extra_env_vars: dict = field(default_factory=dict)
-    compute_runtime_tag: str = "25.05.32567.12"
+    compute_runtime_tag: str = "25.05.32567.18"
     build_igc: bool = False
     current_run_name: str = "This PR"
     preset: Preset = None

From ad13e93adf8cabd17a7f384f68d509fdbc58a134 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 6 Mar 2025 15:02:42 +0000
Subject: [PATCH 11/79] simplify preset implementation and fix normal preset

---
 devops/scripts/benchmarks/main.py    |  8 ++---
 devops/scripts/benchmarks/options.py |  4 +--
 devops/scripts/benchmarks/presets.py | 51 +++++++++++++---------------
 3 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 4df66d7ad9c4c..11f02d627a87f 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,7 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
-from presets import Presets
+from presets import preset_get_by_name, presets
 
 import argparse
 import re
@@ -440,9 +440,9 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument(
         "--preset",
         type=str,
-        choices=[p.name for p in Presets],
+        choices=[p.name() for p in presets],
         help="Benchmark preset to run.",
-        default="FULL",
+        default=options.preset.name(),
     )
 
     args = parser.parse_args()
@@ -469,7 +469,7 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
-    options.preset = Presets[args.preset].value()
+    options.preset = preset_get_by_name(args.preset)
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index aba5aac434917..428ab1f13e9af 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
-from presets import Preset
+from presets import Preset, presets
 
 
 class Compare(Enum):
@@ -40,7 +40,7 @@ class Options:
     compute_runtime_tag: str = "25.05.32567.18"
     build_igc: bool = False
     current_run_name: str = "This PR"
-    preset: Preset = None
+    preset: Preset = presets[0]
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 4db68a925a54e..54727446ecc7d 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -3,22 +3,23 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from enum import Enum
-
+from typing import List, Type
 
 class Preset:
-    def description(self):
-        pass
+    def description(self) -> str:
+        raise NotImplementedError
 
-    def suites(self) -> list[str]:
-        return []
+    def name(self) -> str:
+        return self.__class__.__name__
 
+    def suites(self) -> List[str]:
+        raise NotImplementedError
 
 class Full(Preset):
-    def description(self):
+    def description(self) -> str:
         return "All available benchmarks."
 
-    def suites(self) -> list[str]:
+    def suites(self) -> List[str]:
         return [
             "Compute Benchmarks",
             "llama.cpp bench",
@@ -27,42 +28,38 @@ def suites(self) -> list[str]:
             "UMF",
         ]
 
-
 class SYCL(Preset):
-    def description(self):
+    def description(self) -> str:
         return "All available benchmarks related to SYCL."
 
-    def suites(self) -> list[str]:
+    def suites(self) -> List[str]:
         return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"]
 
-
 class Minimal(Preset):
-    def description(self):
+    def description(self) -> str:
         return "Short microbenchmarks."
 
-    def suites(self) -> list[str]:
+    def suites(self) -> List[str]:
         return ["Compute Benchmarks"]
 
-
 class Normal(Preset):
-    def description(self):
+    def description(self) -> str:
         return "Comprehensive mix of microbenchmarks and real applications."
 
-    def suites(self) -> list[str]:
-        return ["Compute Benchmarks"]
-
+    def suites(self) -> List[str]:
+        return ["Compute Benchmarks", "llama.cpp bench", "Velocity Bench"]
 
 class Test(Preset):
-    def description(self):
+    def description(self) -> str:
         return "Noop benchmarks for framework testing."
 
-    def suites(self) -> list[str]:
+    def suites(self) -> List[str]:
         return ["Test Suite"]
 
+presets = [Full(), SYCL(), Minimal(), Normal(), Test()]
 
-class Presets(Enum):
-    FULL = Full
-    SYCL = SYCL  # Nightly
-    NORMAL = Normal  # PR
-    MINIMAL = Minimal  # Quick smoke tests
-    TEST = Test
+def preset_get_by_name(name: str) -> Preset:
+    for p in presets:
+        if p.name().upper() == name.upper():
+            return p
+    raise ValueError(f"Preset '{name}' not found.")

From 68ed0c4e6bcf1a06bd924e0d96731e52513ae1eb Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 6 Mar 2025 14:44:52 -0800
Subject: [PATCH 12/79] Add PVC and BMG as runners

---
 .github/workflows/sycl-linux-run-tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 9bad484b1c12e..5797755934a0c 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -126,6 +126,8 @@ on:
           - '["cts-cpu"]'
           - '["Linux", "build"]'
           - '["cuda"]'
+          - '["Linux", "bmg"]'
+          - '["PVC_PERF"]'
       image:
         type: choice
         options:

From 3a65b98b4558c56f9aeca5d1b33393715764c361 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 6 Mar 2025 14:49:20 -0800
Subject: [PATCH 13/79] Install dependencies before running UR script

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 9846b5c8bd6c6..41f9e68f3609d 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,7 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      mkdir -v ./llvm_test_workdir
+      pip install -r ./devops/scripts/benchmarks/requirements.txt
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
       echo "-----"
       ls

From 220121aa4229bb8d2e6517bb84112b48fa14317b Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 6 Mar 2025 14:58:52 -0800
Subject: [PATCH 14/79] Use venv for python packages

---
 devops/actions/run-tests/benchmark/action.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 41f9e68f3609d..afd5ede276228 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,6 +90,8 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
+      python3 -m venv .venv
+      . .venv/bin/activate
       pip install -r ./devops/scripts/benchmarks/requirements.txt
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
       echo "-----"

From 37d361cac120e662c1905acd22542014ac1ac73c Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 6 Mar 2025 15:01:21 -0800
Subject: [PATCH 15/79] Install venv before using venv

---
 devops/actions/run-tests/benchmark/action.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index afd5ede276228..88f2e75942c4d 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,6 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
+      apt install -y python3-venv
       python3 -m venv .venv
       . .venv/bin/activate
       pip install -r ./devops/scripts/benchmarks/requirements.txt

From 07f1e107a78f84e320379a5b01e4f92b159964cb Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Fri, 7 Mar 2025 12:07:52 +0000
Subject: [PATCH 16/79] [benchmarks] allow specifying custom results
 directories

---
 devops/scripts/benchmarks/html/data.js   |  2 ++
 devops/scripts/benchmarks/main.py        | 12 +++++++++++-
 devops/scripts/benchmarks/options.py     |  1 +
 devops/scripts/benchmarks/output_html.py |  2 +-
 devops/scripts/benchmarks/presets.py     |  2 +-
 5 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 devops/scripts/benchmarks/html/data.js

diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
new file mode 100644
index 0000000000000..36e076361fe17
--- /dev/null
+++ b/devops/scripts/benchmarks/html/data.js
@@ -0,0 +1,2 @@
+benchmarkRuns = [];
+defaultCompareNames = [];
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 11f02d627a87f..43e0bdf4832b1 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -242,7 +242,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if not options.dry_run:
         chart_data = {this_name: results}
 
-    history = BenchmarkHistory(directory)
+    results_dir = directory
+    if options.custom_results_dir:
+        results_dir = Path(options.custom_results_dir)
+    history = BenchmarkHistory(results_dir)
     # limit how many files we load.
     # should this be configurable?
     history.load(1000)
@@ -444,6 +447,12 @@ def validate_and_parse_env_args(env_args):
         help="Benchmark preset to run.",
         default=options.preset.name(),
     )
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        help="Specify a custom results directory",
+        default=options.custom_results_dir,
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -470,6 +479,7 @@ def validate_and_parse_env_args(env_args):
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
     options.preset = preset_get_by_name(args.preset)
+    options.custom_results_dir = args.results_dir
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 428ab1f13e9af..c567a4a2bda53 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -41,6 +41,7 @@ class Options:
     build_igc: bool = False
     current_run_name: str = "This PR"
     preset: Preset = presets[0]
+    custom_results_dir = None
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index d84fd659beb6c..35fbc2ffb122a 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -40,7 +40,7 @@ def generate_html(benchmark_runs: list, compare_names: list[str]):
                 if i > 0:
                     f.write(",\n")
                 f.write(run.to_json())
-            f.write("\n];\n")
+            f.write("\n]\n")
 
         print(
             f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 54727446ecc7d..5d8e187ac0115 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -60,6 +60,6 @@ def suites(self) -> List[str]:
 
 def preset_get_by_name(name: str) -> Preset:
     for p in presets:
-        if p.name().upper() == name.upper():
+        if p.name() == name:
             return p
     raise ValueError(f"Preset '{name}' not found.")

From 64cf79cb84e8f4a2bc108a8b93cb264adeef6579 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Fri, 7 Mar 2025 15:17:23 +0000
Subject: [PATCH 17/79] [benchmarks] sort runs by date for html output

---
 devops/scripts/benchmarks/html/data.js   | 16 +++++++++++++++-
 devops/scripts/benchmarks/output_html.py |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
index 36e076361fe17..bd2a4bb9c6f36 100644
--- a/devops/scripts/benchmarks/html/data.js
+++ b/devops/scripts/benchmarks/html/data.js
@@ -1,2 +1,16 @@
-benchmarkRuns = [];
+benchmarkRuns = [
+{"results": [{"label": "Memory Bandwidth 1", "value": 2040.8882991390067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 34.457610431783294, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2529.3774380653363, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 135.81200692232412, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2719.8110231537125, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.32053564116694, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3227.632839523546, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.72010893383725, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3514.4167999909496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.05909225714902, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4012.1042760150494, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.80137392913923, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.58153862508325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.155836817249414, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 125.92477357063481, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.26567067278589, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 133.83240260210536, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.763812811796768, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 156.26773548103202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.861842969825087, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 167.3255955272463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.48929969639468, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 220.49290675578928, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.900958177754223, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1480.3642886335488, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.14840825777334, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1757.3646882744213, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 94.97795059309506, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.760057641498, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 32.20444501013399, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2465.113025920638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.56485787432257, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2646.9736547641232, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.21303041397977, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2797.023188351585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 49.789332852672736, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3072.2144224296385, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.0435838937749, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3645.5868819428038, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 186.63713430054412, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4365.696214338321, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 70.80581668642078, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4712.424975602965, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 237.2219789185776, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5490.717140126425, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 102.98496803461086, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5899.69529717778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 365.8281107263356, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.0033673842501, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.641649890532847, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 307.2248975403931, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.106532892713558, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 364.94516101524755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.487184395370704, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 415.1825140704191, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.837117436872584, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 440.50926932373267, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.400527065008065, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 513.2345717731824, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.92653205921289, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "13462f5f6", "github_repo": "pbalcer/llvm", "date": "2025-03-07T14:04:12.881983+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2061.891541779758, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.43418752146129, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2418.370570307403, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 23.41390025375235, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2759.548256219084, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.04750469338484, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3268.9851244693905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 179.65245219605663, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3573.980571932074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.27214661339116, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3913.178724155857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 187.41955301323392, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.66099349103821, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.949437203365676, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 116.94033117978861, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.670085238288802, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 141.8516673102208, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.49397378099331, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 154.47973126513787, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.7581068444608, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 194.47100906915202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.603348605481727, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 189.26766261792042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.80270435298115, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1548.0366148601304, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 22.556620202365167, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1804.0612981627564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 130.9251933818919, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2117.020524938414, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.18576268885376, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2340.6226309817375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.23157229205414, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2657.435335624127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 178.93395582367347, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3100.1660243239976, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 59.26661177659249, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2973.0427624231074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.47659228805884, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3499.50915562217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 202.92584935080856, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 3906.063346066898, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 58.67588644266499, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4776.315860317371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 337.294287649651, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5294.515316259128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 310.6460231086305, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5883.364679907042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 433.9862905464425, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.81458542543336, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.259893742055365, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.324345463754, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.537217356717523, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.317230088579, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.694135619195492, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 404.94767826325585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.03967001195265, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 448.68781789313334, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 37.68940635002855, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 479.7145913704619, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.819332357308436, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "52dba2a69", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:48:42.727410+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1944.712475358489, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.3517754822544, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2494.968647183357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.62096222735542, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2827.96959627778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 161.09215987917975, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3246.4235207906368, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.8841813593721, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3415.497030173447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 207.51586434688852, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3947.173405699456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.35155081978226, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.27501062264594, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.62997659996243, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.58001802257706, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.223861407928204, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 152.60658050771121, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.644344734962786, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.8365309090243, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 1.9279203474927489, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 179.69325992783263, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.567971182588, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 190.29777300705297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.545022416801082, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1520.7774888153917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.44363449416652, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1841.9402998174073, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.99472050334539, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2063.573372718332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.76799421011498, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2411.1299338593512, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.55096124823987, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2636.4186072468115, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 136.15002376636508, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3012.5429889405455, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 220.10345804333795, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2912.3694681990496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.24541212948046, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3634.840665141933, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.90393111568957, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4221.70291649172, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 245.0992536434908, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4563.9141528786395, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.15450755100105, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5449.735755715656, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 283.67446282594074, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6103.288896553245, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 497.0264510256128, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.1162346822855, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.349695364944424, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.0848370650819, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.091832690685845, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 368.2173261284879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.911533458328602, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 400.932628864893, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.298171550718916, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 465.45774333645085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.008461742975705, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 494.19807030391513, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 31.290996975880688, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "a15019b41", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:42:53.963514+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1971.9235866578244, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 107.4119769093561, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2381.359513168276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.1820922785026, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.164331241929, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 152.82523354152792, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3207.788500404049, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.98152700892044, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3612.0807949868076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 238.29524372895352, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4041.187128183399, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 244.78707963276804, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 110.17204676929632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.7488792731298, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 110.04874446073308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.111000761355566, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 139.80726599267632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.761524761674202, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 167.65946901880108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.961270297928603, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 175.07359940308456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.654053542209933, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 188.92280945420617, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.32935674842163, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1498.3892879578825, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 72.76968286004643, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1802.449855059067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.35877323708975, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.6873668536814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 109.1211656598374, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2481.234320462784, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.29288921121633, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2592.315439130817, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.50618527958042, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2986.630322110839, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 134.14155338256344, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3023.0069882524413, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.0861804957972, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3491.2685416445424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.82885721897767, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4267.684357012167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 258.535523100285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4833.943488351638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 288.5816839229039, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5460.197706764911, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3526928188145, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6211.479518188777, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 448.53753098503586, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.60974821168077, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.966964309950376, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 299.08129766722294, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.458275817843905, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.13218478336375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.88260705972654, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 368.43448345001804, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.0293359056239115, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 462.81719243303485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.16929631101137, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 498.84520836251704, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.943372517547482, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:37:14.849756+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2013.395440288061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.82142134259605, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2432.2596423503755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.39327416892019, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2674.0160578165187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.41545828080007, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3063.9534832147688, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.67379884852215, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3584.672342581568, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.67353531675607, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4125.180591214061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 273.2758074594961, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 106.37633318466106, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.247008579218756, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 111.99312616915259, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.168574067720925, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 148.4561344088857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.59295361046173, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.0852714518944, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.380760230770385, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 187.04637816265117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.658051327117878, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.16012739025047, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.6645406941134, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1505.183607875215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 93.57793481885791, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1786.864494698917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 122.1347513455775, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2104.854088217566, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.42311038597916, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2373.3921231994896, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.26128420435194, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2680.62360254391, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 184.49504836547473, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2957.0424468763595, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.13611056356788, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3024.0197501043167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.3618836169113, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3658.757514096598, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 149.8130576669698, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4336.791327103415, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.10403249537495, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4594.550884548686, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 339.1255595981214, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5619.202557626439, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.7429329550701, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6145.450470023206, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 397.2604324517752, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 242.7598020860891, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.503364581661284, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.888600531132, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.878793912236713, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 333.6634181341022, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.945944118430873, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 386.559044229885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.909652211845977, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 433.56985826314695, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.16786402230611, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 475.40739140041325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.532574731353257, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:55:23.831147+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2036.879511822098, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.49123010982262, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2358.605120547564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.31108709325747, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2782.758869742085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.07850443580668, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3211.303768537726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.64603088602735, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3726.2788114170226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.68455828387613, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.451298605878, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 214.04589132488434, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 97.81132147931729, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.4388910648024, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.47877514885052, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.850644538343035, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 138.3636972712076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.453475343660529, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 159.0926504710019, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.406923335827646, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.58148765355367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.719641698346496, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 213.78191902260386, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.56513730925096, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1508.4347909839335, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.90540186941426, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1765.9068352126365, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 83.00665769599348, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2079.3459975121978, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 129.25159465427944, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2370.0084472113276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 110.2565848005119, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2598.252204318904, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.98495052891545, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2969.9956302642463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.29990951898574, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2929.264699223759, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.51544383864362, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3605.747338045167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.72266927612378, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4169.092383202888, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 221.65028734739832, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4342.400927657371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 10.226688336643164, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5335.841345368252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 322.69883423073804, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5891.394678938614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 442.78667173376004, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 253.57797655240805, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.797128115716593, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.17543480746747, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.95344804548685, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 353.0001179231053, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.30650858255822, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 393.61574583773006, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.460697740276498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 411.7013399749935, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.8389196983489504, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.65540609194693, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 32.30948655635452, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "59d88dae7", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:49:15.115091+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2195.552651542308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 40.940741416639945, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2207.459054225258, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 31.681573504875555, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2791.852261483982, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 145.62649882463464, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3134.2219672329984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.02514783326134, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3767.7635130447607, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.24591155046014, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3942.521187753682, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 228.82977417585033, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.809622959215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.473952358992248, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.83059821116996, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.60938099214386, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.93982647796008, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.29049957344098, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.82319101117525, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.247880470121356, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.31431566581708, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.811044444821867, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 217.37228664795157, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.08328831134193, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1549.1191711106521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.63323493526255, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1748.2566655197188, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 125.49717792070385, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2038.1492661325733, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.90033883093976, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2435.624131184369, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.4633804704484, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2625.115911806016, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.00862169479268, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3041.342229934156, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.4496950355338, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2937.258997841614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.30016809201283, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3538.971007263721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.88178732022945, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4063.7149977059134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.4858199901966, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4911.07807577187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 250.7864115701977, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5377.1846970238585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 306.0068346396366, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6245.575950509069, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 298.97595013407596, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.84781710540977, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.78683687151215, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.5304009113721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.652016327478979, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.4112170450192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.461446948742276, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 395.8114457367419, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.580352011562915, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 449.871031326954, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 30.053959147816688, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 504.6580132142422, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.41875628689506, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "PR1234", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:58:34.927820+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1958.784118312001, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.57484819538932, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2440.601149884664, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.0533346583976, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2721.428822801097, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 249.6308268113163, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3177.0055972660625, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 146.92056751044575, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3549.5230383598678, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 234.94466209634086, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3978.0960993946674, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 188.9037213571779, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.09498391363023, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.02579026210347, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 109.08496102147217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.749411126280116, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 161.69893522471634, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.4430257786783773, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.34529521039352, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.7714067922127894, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 170.86523239479655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.608020176521034, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 181.05706010508592, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.277369339946695, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1463.0649649228315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.83848693136936, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1864.683141120113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.4841206172361, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2130.758830413485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.54699391922728, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2381.8935399566794, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.76036506870986, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2662.7577579295776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 132.5724441198216, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3078.79130536842, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 17.097525165274803, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2955.7832223272444, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.2189444201398, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3688.781307878483, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 65.65926515650821, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4183.4728233450305, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.81987978181542, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4939.824132342117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 289.1390313704078, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5502.544756998508, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 379.9176358151893, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5664.321185867887, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.74897438065652, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 246.62407640713522, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.589667669507943, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08780541388853, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.339251126835014, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 349.13408375848826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.707215404345545, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 420.6620028708826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.922885386248023, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 470.0593095392814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.595229921387679, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 495.115546467953, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.928558698066297, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:43.925526+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2171.099861571096, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 19.23255817429395, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2429.228219203666, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.04518738452575, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2756.5078091010796, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.73272767497978, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3197.349485288246, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 154.47555387593712, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3607.973454642879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.0597134090529, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3925.314914910963, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 293.48112660476045, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 104.57782310281735, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.873834118675967, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.5117553518436, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.407159402934873, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 142.08007511017124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.930090749895689, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.0629031829932, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.918041427401283, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.6427038678885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.828269431125875, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.60322195597215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.338879356636095, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1491.980189873357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 71.9836340794669, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1794.0628090299717, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 14.307364673980224, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2192.3591192326044, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 114.60420372385168, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2422.202702788314, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.26859163162072, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2770.8727103546726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 195.12079821799085, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2951.282362921916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.2254379990313, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3039.27661040724, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.6539091592498, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3578.211797262128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 159.14128724739464, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4128.29686489867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 223.4100922139098, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4848.219925955905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 77.93231029690887, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5070.191606088231, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.94019467972001, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5966.489310951252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 336.7173682128105, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 254.57850713986198, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.385164783606097, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 304.8091397808394, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.103188082400504, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.1613069208256, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.345582528912242, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 411.1456865029576, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.86244360659498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.04740645126986, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.597587190328635, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 545.743901896845, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 8.94286171044266, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:27.051913+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1993.661134316776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 123.85525126992296, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2301.0905948917325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.48673687735095, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2873.4628362191897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.61249284171058, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3238.735403505523, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.51716037758475, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3728.4508889231124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.24607483750995, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.9082581910916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 74.76961240079906, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.88113187316719, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.905008641590433, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 121.61102013493655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.792042693243397, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.99528044475127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.222627363561376, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 163.077114107551, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.17919680914877, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.59968240327134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.466938787214904, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 198.73690996443867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.07228063106639, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1456.8721146219054, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.05357208107213, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1760.0202375360182, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 113.83470167982718, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2033.3289371002388, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 131.96155202489578, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2408.2974437457224, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.38445697767614, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2693.2667748312374, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.88552510962938, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2991.3045632907692, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.616739773559836, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3006.5513639744195, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.20153435546402, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3946.7240883975173, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 24.834845762711534, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4471.79595749108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 222.54023025674027, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4746.352137751869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 299.0771752770653, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5465.286069604949, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 348.6918957133431, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5823.519621687581, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3249644414966, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.32918263045667, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.03544118455393, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 288.1546272324227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.7727205750953, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 363.3503259942238, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.098142551778466, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 392.91985489944227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.846918288877376, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 456.7540443475017, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.728347618091988, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 499.13159330438293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.2322764193576, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:48:34.707858+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2038.9496500003788, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.27052133056621, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2294.3238192937456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.05216178962178, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.7462067242177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 120.10657812200931, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3330.947955167447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.07867992457224, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3427.804220062, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.398802753262366, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3931.7861541695424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 259.7643410153898, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 101.89870179257153, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.924103694663449, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 124.9849961475332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.073706451113821, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 150.17912140564707, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.831834198448414, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 165.06404530951897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.098638603407267, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 189.4271367424946, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.049029334825786, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 211.70091863399844, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.393712112471537, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1534.395057650628, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 81.6427334392383, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1778.474541262558, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 42.56143420705744, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2133.7461366070925, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 116.35913144113613, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2459.5790315346367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.71322011411286, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2637.4334475618302, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 183.30427116704686, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2944.098595726341, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.72289928237976, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2907.9632013559226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.53757173689922, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3509.107421580347, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 236.8620853533764, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4200.093284524192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 239.58028996799285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4713.504209113087, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 227.25719976419228, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5049.944494674869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.03307008996549, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6191.498973826217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.5921715209765, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.80616580373456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.592467485447356, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08520837227366, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.677266179208607, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.6038589068661, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.454584817104773, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 385.0134083066721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.301075636602707, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 444.0720671004903, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.366607976819555, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 544.9286314848067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.8252101632892845, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:43:24.047048+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2021.1035365873993, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.72840561483144, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2338.909416436906, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.64663652969023, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2858.077160911349, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 192.0675550591675, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3306.833623604521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.99029424270755, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3627.5542312476477, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.9433053351406, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3950.086638208113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.7800326425516, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.47479639005672, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.581115036930171, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 112.93833387666766, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.456175417231416, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 127.96521280400299, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.881167162370817, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 164.06646826051218, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.400563021933642, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 172.50207971758653, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.59514547087479, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 206.57752612959177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.6206498096027, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1450.762861653755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.85051722934544, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1744.8736145848297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 28.4724370062761, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2137.935073637293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.15696927062444, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2405.7909943176865, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 138.83795715557775, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2660.942840886126, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.5879766560021, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3070.783714494726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 225.80178015382134, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3021.0961116313642, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 63.199028430669784, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3562.444757764406, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 233.25324926372082, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4147.683102448584, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.47351186248994, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4681.79862307404, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 201.00316493809274, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5257.332484362561, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.82272792943763, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5860.230588756176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 370.86153080312647, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 245.42900602601247, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.361128649495964, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.16320013554315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.935265770560466, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.53233993081176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.5441134792233, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 397.50592062832635, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.267205299179718, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.56360681512984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 28.587460065910978, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.39520093238633, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.049730400867045, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:40:45.136466+00:00"}
+];
+
 defaultCompareNames = [];
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 35fbc2ffb122a..53dd4b1e8f968 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -11,6 +11,7 @@
 def generate_html(benchmark_runs: list, compare_names: list[str]):
     # create path to data.js in html folder
     html_path = os.path.join(os.path.dirname(__file__), "html")
+    benchmark_runs.sort(key=lambda run: run.date, reverse=True)
 
     if options.output_html == "local":
         data_path = os.path.join(html_path, "data.js")

From 6c28d333dadab0eccd40a80f2f84aa50107e3b93 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Mon, 10 Mar 2025 11:21:53 +0000
Subject: [PATCH 18/79] simplify presets, remove suites if all set

---
 devops/scripts/benchmarks/html/scripts.js |  4 +-
 devops/scripts/benchmarks/main.py         | 10 +--
 devops/scripts/benchmarks/options.py      |  4 +-
 devops/scripts/benchmarks/presets.py      | 91 ++++++++---------------
 4 files changed, 41 insertions(+), 68 deletions(-)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 7b8b4d742cca2..7ba00738e727a 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -6,6 +6,7 @@
 // Core state
 let activeRuns = new Set(defaultCompareNames);
 let chartInstances = new Map();
+let suiteNames = new Set();
 let timeseriesData, barChartsData, allRunNames;
 
 // DOM Elements
@@ -306,7 +307,7 @@ function updateURL() {
         url.searchParams.delete('regex');
     }
 
-    if (activeSuites.length > 0) {
+    if (activeSuites.length > 0 && activeSuites.length != suiteNames.size) {
         url.searchParams.set('suites', activeSuites.join(','));
     } else {
         url.searchParams.delete('suites');
@@ -444,7 +445,6 @@ function setupRunSelector() {
 function setupSuiteFilters() {
     suiteFiltersContainer = document.getElementById('suite-filters');
 
-    const suiteNames = new Set();
     benchmarkRuns.forEach(run => {
         run.results.forEach(result => {
             suiteNames.add(result.suite);
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 43e0bdf4832b1..91f84917f8698 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,7 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
-from presets import preset_get_by_name, presets
+from presets import enabled_suites, presets
 
 import argparse
 import re
@@ -164,7 +164,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     failures = {}
 
     for s in suites:
-        if s.name() not in options.preset.suites():
+        if s.name() not in enabled_suites(options.preset):
             continue
 
         suite_benchmarks = s.benchmarks()
@@ -443,9 +443,9 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument(
         "--preset",
         type=str,
-        choices=[p.name() for p in presets],
+        choices=[p for p in presets.keys()],
         help="Benchmark preset to run.",
-        default=options.preset.name(),
+        default=options.preset,
     )
     parser.add_argument(
         "--results-dir",
@@ -478,7 +478,7 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
-    options.preset = preset_get_by_name(args.preset)
+    options.preset = args.preset
     options.custom_results_dir = args.results_dir
 
     if args.build_igc and args.compute_runtime is None:
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index c567a4a2bda53..7600942acd1e5 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass, field
 from enum import Enum
-from presets import Preset, presets
 
+from presets import presets
 
 class Compare(Enum):
     LATEST = "latest"
@@ -40,7 +40,7 @@ class Options:
     compute_runtime_tag: str = "25.05.32567.18"
     build_igc: bool = False
     current_run_name: str = "This PR"
-    preset: Preset = presets[0]
+    preset: str = "Full"
     custom_results_dir = None
 
 
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 5d8e187ac0115..7f5dc8d78460a 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -3,63 +3,36 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from typing import List, Type
+presets: dict[str, list[str]] = {
+    "Full": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "SYCL-Bench",
+        "Velocity Bench",
+        "UMF",
+    ],
+    "SYCL": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "SYCL-Bench",
+        "Velocity Bench",
+    ],
+    "Minimal": [
+        "Compute Benchmarks",
+    ],
+    "Normal": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "Velocity Bench",
+    ],
+    "Test": [
+        "Test Suite",
+    ],
+}
+
+def enabled_suites(preset: str) -> list[str]:
+    try:
+        return presets[preset]
+    except KeyError:
+        raise ValueError(f"Preset '{preset}' not found.")
 
-class Preset:
-    def description(self) -> str:
-        raise NotImplementedError
-
-    def name(self) -> str:
-        return self.__class__.__name__
-
-    def suites(self) -> List[str]:
-        raise NotImplementedError
-
-class Full(Preset):
-    def description(self) -> str:
-        return "All available benchmarks."
-
-    def suites(self) -> List[str]:
-        return [
-            "Compute Benchmarks",
-            "llama.cpp bench",
-            "SYCL-Bench",
-            "Velocity Bench",
-            "UMF",
-        ]
-
-class SYCL(Preset):
-    def description(self) -> str:
-        return "All available benchmarks related to SYCL."
-
-    def suites(self) -> List[str]:
-        return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"]
-
-class Minimal(Preset):
-    def description(self) -> str:
-        return "Short microbenchmarks."
-
-    def suites(self) -> List[str]:
-        return ["Compute Benchmarks"]
-
-class Normal(Preset):
-    def description(self) -> str:
-        return "Comprehensive mix of microbenchmarks and real applications."
-
-    def suites(self) -> List[str]:
-        return ["Compute Benchmarks", "llama.cpp bench", "Velocity Bench"]
-
-class Test(Preset):
-    def description(self) -> str:
-        return "Noop benchmarks for framework testing."
-
-    def suites(self) -> List[str]:
-        return ["Test Suite"]
-
-presets = [Full(), SYCL(), Minimal(), Normal(), Test()]
-
-def preset_get_by_name(name: str) -> Preset:
-    for p in presets:
-        if p.name() == name:
-            return p
-    raise ValueError(f"Preset '{name}' not found.")

From e15b94ffa6a0bc297b437f35f8afa3885befdb57 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Mon, 10 Mar 2025 11:25:23 +0000
Subject: [PATCH 19/79] [benchmarks] use python venv for scripts

---
 .github/workflows/ur-benchmarks-reusable.yml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index 6e8a4ea535d15..e3a754753ecf8 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -80,9 +80,13 @@ jobs:
         git checkout origin/pr/${{ inputs.pr_no }}/merge
         git rev-parse origin/pr/${{ inputs.pr_no }}/merge
 
-    - name: Install pip packages
+    - name: Create virtual environment
+      run: python -m venv .venv
+
+    - name: Activate virtual environment and install pip packages
       run: |
-        pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
+        source .venv/bin/activate
+        pip install -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
 
     - name: Configure SYCL
       run: >
@@ -139,6 +143,7 @@ jobs:
       working-directory: ${{ github.workspace }}
       id: benchmarks
       run: >
+        source .venv/bin/activate &&
         taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build

From 78fd037de376d59a404965817d464edd31bb6890 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 07:53:36 -0700
Subject: [PATCH 20/79] Run apt with sudo

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 88f2e75942c4d..5c343f2ff8e26 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,7 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      apt install -y python3-venv
+      sudo apt install -y python3-venv
       python3 -m venv .venv
       . .venv/bin/activate
       pip install -r ./devops/scripts/benchmarks/requirements.txt

From 82b6e55be0f627a301117da05de16bc2ed723b70 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 08:16:58 -0700
Subject: [PATCH 21/79] Ignore "missing" apt packages in workflow

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 5c343f2ff8e26..87a629dc60fd6 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,7 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      sudo apt install -y python3-venv
+      sudo apt install -y --ignore-missing python3-venv
       python3 -m venv .venv
       . .venv/bin/activate
       pip install -r ./devops/scripts/benchmarks/requirements.txt

From 162cba01ca234ab7645cf59c9d7b82d512870c69 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 09:48:32 -0700
Subject: [PATCH 22/79] Change pip to install to user

---
 devops/actions/run-tests/benchmark/action.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 87a629dc60fd6..34fdf178afe0e 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,10 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      sudo apt install -y --ignore-missing python3-venv
-      python3 -m venv .venv
-      . .venv/bin/activate
-      pip install -r ./devops/scripts/benchmarks/requirements.txt
+      pip install --user -r ./devops/scripts/benchmarks/requirements.txt
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
       echo "-----"
       ls

From 848f7410b0e7823eb94d288d549474785a339a30 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 09:51:09 -0700
Subject: [PATCH 23/79] Ignore system controlled python env

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 34fdf178afe0e..79cb2bf4aea5b 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,7 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      pip install --user -r ./devops/scripts/benchmarks/requirements.txt
+      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
       echo "-----"
       ls

From 918604ebd0a22f51be67055b2eea7c877e84a943 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 13:08:09 -0700
Subject: [PATCH 24/79] [CI] use realpaths when referring to SYCL

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 79cb2bf4aea5b..92c948ffd7168 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -91,7 +91,7 @@ runs:
       sycl-ls
       echo "-----"
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline
       echo "-----"
       ls
 #  - name: Push compute-benchmarks results

From 72d873034ae844678090c0f4c7082a2a8d893b99 Mon Sep 17 00:00:00 2001
From: Ian Li <ian.li@intel.com>
Date: Mon, 10 Mar 2025 18:10:26 -0400
Subject: [PATCH 25/79] [CI] use minimal preset when running benchmarks

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 92c948ffd7168..c10a163261c13 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -91,7 +91,7 @@ runs:
       sycl-ls
       echo "-----"
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline --preset Minimal
       echo "-----"
       ls
 #  - name: Push compute-benchmarks results

From 066f5a60c102669878188cc76532c0f57e2e55bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= <lukasz.stolarczuk@intel.com>
Date: Wed, 12 Mar 2025 11:33:44 +0100
Subject: [PATCH 26/79] [CI] Allow 2 bench scripts locations (#17394)

On PRs based on main, the scripts location is "old" and not accesible.
Pick location based on the dir existance. Step 'gather info' is in
a 'weird' location, so solve it with 2 tries to execute the script.
---
 .github/workflows/ur-benchmarks-reusable.yml | 23 +++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index e3a754753ecf8..d7c32edfdfc2a 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -80,13 +80,27 @@ jobs:
         git checkout origin/pr/${{ inputs.pr_no }}/merge
         git rev-parse origin/pr/${{ inputs.pr_no }}/merge
 
+    # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location
+    - name: Establish bench scripts location
+      run: |
+        if [ -d "${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" ]; then
+          echo "Bench scripts are in devops/scripts"
+          echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" >> $GITHUB_ENV
+        elif [ -d "${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" ]; then
+          echo "Bench scripts are in unified-runtime/scripts"
+          echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" >> $GITHUB_ENV
+        else
+          echo "Bench scripts are absent...?"
+          exit 1
+        fi
+
     - name: Create virtual environment
       run: python -m venv .venv
 
     - name: Activate virtual environment and install pip packages
       run: |
         source .venv/bin/activate
-        pip install -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
+        pip install -r ${BENCH_SCRIPTS_DIR}/requirements.txt
 
     - name: Configure SYCL
       run: >
@@ -144,7 +158,7 @@ jobs:
       id: benchmarks
       run: >
         source .venv/bin/activate &&
-        taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py
+        taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build
         --ur ${{ github.workspace }}/ur_install
@@ -198,6 +212,9 @@ jobs:
         path: benchmark_results_${{ inputs.pr_no }}.html
         key: benchmark-results-${{ inputs.pr_no }}-${{ matrix.adapter.str_name }}-${{ github.run_id }}
 
+    # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh
+      run: |
+        ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh || true
+        ${{github.workspace}}/sycl-repo/unified-runtime/.github/scripts/get_system_info.sh || true

From 18e5291a405ce1c912d0df3ac02aa5446c099ef8 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Wed, 12 Mar 2025 14:28:08 +0000
Subject: [PATCH 27/79] add ulls compute benchmarks

---
 devops/scripts/benchmarks/benches/compute.py | 62 ++++++++++++++++++++
 devops/scripts/benchmarks/html/data.js       | 15 +----
 2 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index d35a8e2791648..92818cc00fad2 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -81,6 +81,10 @@ def benchmarks(self) -> list[Benchmark]:
             GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
             GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
             GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
+            UllsEmptyKernel(self, RUNTIMES.SYCL, 1000, 256),
+            UllsEmptyKernel(self, RUNTIMES.LEVEL_ZERO, 1000, 256),
+            UllsKernelSwitch(self, RUNTIMES.SYCL, 8, 200, 0, 0, 1, 1),
+            UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1),
         ]
 
         if options.ur is not None:
@@ -531,3 +535,61 @@ def bin_args(self) -> list[str]:
             "--withCopyOffload=1",
             "--immediateAppendCmdList=0",
         ]
+
+class UllsEmptyKernel(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, wgc, wgs):
+        self.wgc = wgc
+        self.wgs = wgs
+        self.runtime = runtime
+        super().__init__(
+            bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel"
+        )
+
+    def explicit_group(self):
+        return f"EmptyKernel {self.wgc} {self.wgs}"
+
+    def description(self) -> str:
+        return ""
+
+    def name(self):
+        return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=10000",
+            f"--wgs={self.wgs}",
+            f"--wgc={self.wgs}",
+        ]
+
+class UllsKernelSwitch(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVisible, ioq, ctrBasedEvents):
+        self.count = count
+        self.kernelTime = kernelTime
+        self.barrier = barrier
+        self.hostVisible = hostVisible
+        self.ctrBasedEvents = ctrBasedEvents
+        self.runtime = runtime
+        self.ioq = ioq
+        super().__init__(
+            bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch"
+        )
+
+    def explicit_group(self):
+        return f"KernelSwitch {self.count} {self.kernelTime}"
+
+    def description(self) -> str:
+        return ""
+
+    def name(self):
+        return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=1000",
+            f"--count={self.count}",
+            f"--kernelTime={self.kernelTime}",
+            f"--barrier={self.barrier}",
+            f"--hostVisible={self.hostVisible}",
+            f"--ioq={self.ioq}",
+            f"--ctrBasedEvents={self.ctrBasedEvents}",
+        ]
diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
index bd2a4bb9c6f36..a5b96c72834ba 100644
--- a/devops/scripts/benchmarks/html/data.js
+++ b/devops/scripts/benchmarks/html/data.js
@@ -1,16 +1,3 @@
-benchmarkRuns = [
-{"results": [{"label": "Memory Bandwidth 1", "value": 2040.8882991390067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 34.457610431783294, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2529.3774380653363, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 135.81200692232412, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2719.8110231537125, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.32053564116694, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3227.632839523546, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.72010893383725, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3514.4167999909496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.05909225714902, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4012.1042760150494, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.80137392913923, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.58153862508325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.155836817249414, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 125.92477357063481, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.26567067278589, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 133.83240260210536, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.763812811796768, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 156.26773548103202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.861842969825087, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 167.3255955272463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.48929969639468, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 220.49290675578928, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.900958177754223, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1480.3642886335488, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.14840825777334, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1757.3646882744213, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 94.97795059309506, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.760057641498, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 32.20444501013399, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2465.113025920638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.56485787432257, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2646.9736547641232, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.21303041397977, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2797.023188351585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 49.789332852672736, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3072.2144224296385, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.0435838937749, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3645.5868819428038, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 186.63713430054412, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4365.696214338321, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 70.80581668642078, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4712.424975602965, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 237.2219789185776, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5490.717140126425, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 102.98496803461086, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5899.69529717778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 365.8281107263356, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.0033673842501, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.641649890532847, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 307.2248975403931, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.106532892713558, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 364.94516101524755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.487184395370704, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 415.1825140704191, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.837117436872584, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 440.50926932373267, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.400527065008065, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 513.2345717731824, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.92653205921289, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "13462f5f6", "github_repo": "pbalcer/llvm", "date": "2025-03-07T14:04:12.881983+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2061.891541779758, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.43418752146129, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2418.370570307403, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 23.41390025375235, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2759.548256219084, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.04750469338484, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3268.9851244693905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 179.65245219605663, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3573.980571932074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.27214661339116, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3913.178724155857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 187.41955301323392, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.66099349103821, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.949437203365676, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 116.94033117978861, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.670085238288802, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 141.8516673102208, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.49397378099331, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 154.47973126513787, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.7581068444608, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 194.47100906915202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.603348605481727, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 189.26766261792042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.80270435298115, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1548.0366148601304, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 22.556620202365167, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1804.0612981627564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 130.9251933818919, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2117.020524938414, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.18576268885376, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2340.6226309817375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.23157229205414, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2657.435335624127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 178.93395582367347, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3100.1660243239976, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 59.26661177659249, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2973.0427624231074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.47659228805884, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3499.50915562217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 202.92584935080856, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 3906.063346066898, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 58.67588644266499, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4776.315860317371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 337.294287649651, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5294.515316259128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 310.6460231086305, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5883.364679907042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 433.9862905464425, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.81458542543336, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.259893742055365, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.324345463754, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.537217356717523, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.317230088579, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.694135619195492, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 404.94767826325585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.03967001195265, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 448.68781789313334, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 37.68940635002855, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 479.7145913704619, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.819332357308436, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "52dba2a69", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:48:42.727410+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 1944.712475358489, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.3517754822544, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2494.968647183357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.62096222735542, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2827.96959627778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 161.09215987917975, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3246.4235207906368, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.8841813593721, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3415.497030173447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 207.51586434688852, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3947.173405699456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.35155081978226, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.27501062264594, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.62997659996243, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.58001802257706, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.223861407928204, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 152.60658050771121, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.644344734962786, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.8365309090243, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 1.9279203474927489, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 179.69325992783263, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.567971182588, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 190.29777300705297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.545022416801082, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1520.7774888153917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.44363449416652, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1841.9402998174073, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.99472050334539, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2063.573372718332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.76799421011498, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2411.1299338593512, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.55096124823987, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2636.4186072468115, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 136.15002376636508, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3012.5429889405455, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 220.10345804333795, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2912.3694681990496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.24541212948046, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3634.840665141933, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.90393111568957, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4221.70291649172, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 245.0992536434908, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4563.9141528786395, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.15450755100105, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5449.735755715656, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 283.67446282594074, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6103.288896553245, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 497.0264510256128, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.1162346822855, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.349695364944424, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.0848370650819, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.091832690685845, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 368.2173261284879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.911533458328602, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 400.932628864893, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.298171550718916, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 465.45774333645085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.008461742975705, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 494.19807030391513, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 31.290996975880688, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "a15019b41", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:42:53.963514+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 1971.9235866578244, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 107.4119769093561, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2381.359513168276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.1820922785026, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.164331241929, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 152.82523354152792, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3207.788500404049, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.98152700892044, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3612.0807949868076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 238.29524372895352, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4041.187128183399, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 244.78707963276804, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 110.17204676929632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.7488792731298, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 110.04874446073308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.111000761355566, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 139.80726599267632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.761524761674202, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 167.65946901880108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.961270297928603, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 175.07359940308456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.654053542209933, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 188.92280945420617, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.32935674842163, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1498.3892879578825, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 72.76968286004643, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1802.449855059067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.35877323708975, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.6873668536814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 109.1211656598374, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2481.234320462784, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.29288921121633, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2592.315439130817, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.50618527958042, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2986.630322110839, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 134.14155338256344, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3023.0069882524413, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.0861804957972, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3491.2685416445424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.82885721897767, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4267.684357012167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 258.535523100285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4833.943488351638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 288.5816839229039, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5460.197706764911, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3526928188145, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6211.479518188777, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 448.53753098503586, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.60974821168077, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.966964309950376, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 299.08129766722294, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.458275817843905, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.13218478336375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.88260705972654, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 368.43448345001804, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.0293359056239115, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 462.81719243303485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.16929631101137, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 498.84520836251704, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.943372517547482, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:37:14.849756+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2013.395440288061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.82142134259605, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2432.2596423503755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.39327416892019, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2674.0160578165187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.41545828080007, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3063.9534832147688, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.67379884852215, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3584.672342581568, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.67353531675607, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4125.180591214061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 273.2758074594961, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 106.37633318466106, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.247008579218756, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 111.99312616915259, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.168574067720925, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 148.4561344088857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.59295361046173, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.0852714518944, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.380760230770385, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 187.04637816265117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.658051327117878, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.16012739025047, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.6645406941134, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1505.183607875215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 93.57793481885791, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1786.864494698917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 122.1347513455775, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2104.854088217566, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.42311038597916, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2373.3921231994896, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.26128420435194, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2680.62360254391, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 184.49504836547473, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2957.0424468763595, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.13611056356788, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3024.0197501043167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.3618836169113, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3658.757514096598, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 149.8130576669698, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4336.791327103415, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.10403249537495, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4594.550884548686, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 339.1255595981214, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5619.202557626439, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.7429329550701, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6145.450470023206, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 397.2604324517752, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 242.7598020860891, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.503364581661284, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.888600531132, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.878793912236713, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 333.6634181341022, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.945944118430873, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 386.559044229885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.909652211845977, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 433.56985826314695, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.16786402230611, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 475.40739140041325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.532574731353257, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:55:23.831147+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2036.879511822098, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.49123010982262, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2358.605120547564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.31108709325747, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2782.758869742085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.07850443580668, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3211.303768537726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.64603088602735, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3726.2788114170226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.68455828387613, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.451298605878, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 214.04589132488434, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 97.81132147931729, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.4388910648024, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.47877514885052, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.850644538343035, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 138.3636972712076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.453475343660529, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 159.0926504710019, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.406923335827646, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.58148765355367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.719641698346496, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 213.78191902260386, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.56513730925096, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1508.4347909839335, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.90540186941426, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1765.9068352126365, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 83.00665769599348, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2079.3459975121978, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 129.25159465427944, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2370.0084472113276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 110.2565848005119, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2598.252204318904, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.98495052891545, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2969.9956302642463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.29990951898574, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2929.264699223759, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.51544383864362, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3605.747338045167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.72266927612378, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4169.092383202888, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 221.65028734739832, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4342.400927657371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 10.226688336643164, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5335.841345368252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 322.69883423073804, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5891.394678938614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 442.78667173376004, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 253.57797655240805, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.797128115716593, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.17543480746747, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.95344804548685, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 353.0001179231053, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.30650858255822, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 393.61574583773006, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.460697740276498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 411.7013399749935, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.8389196983489504, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.65540609194693, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 32.30948655635452, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "59d88dae7", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:49:15.115091+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2195.552651542308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 40.940741416639945, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2207.459054225258, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 31.681573504875555, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2791.852261483982, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 145.62649882463464, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3134.2219672329984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.02514783326134, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3767.7635130447607, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.24591155046014, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3942.521187753682, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 228.82977417585033, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.809622959215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.473952358992248, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.83059821116996, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.60938099214386, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.93982647796008, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.29049957344098, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.82319101117525, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.247880470121356, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.31431566581708, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.811044444821867, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 217.37228664795157, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.08328831134193, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1549.1191711106521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.63323493526255, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1748.2566655197188, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 125.49717792070385, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2038.1492661325733, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.90033883093976, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2435.624131184369, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.4633804704484, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2625.115911806016, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.00862169479268, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3041.342229934156, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.4496950355338, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2937.258997841614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.30016809201283, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3538.971007263721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.88178732022945, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4063.7149977059134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.4858199901966, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4911.07807577187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 250.7864115701977, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5377.1846970238585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 306.0068346396366, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6245.575950509069, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 298.97595013407596, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.84781710540977, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.78683687151215, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.5304009113721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.652016327478979, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.4112170450192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.461446948742276, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 395.8114457367419, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.580352011562915, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 449.871031326954, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 30.053959147816688, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 504.6580132142422, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.41875628689506, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "PR1234", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:58:34.927820+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 1958.784118312001, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.57484819538932, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2440.601149884664, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.0533346583976, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2721.428822801097, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 249.6308268113163, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3177.0055972660625, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 146.92056751044575, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3549.5230383598678, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 234.94466209634086, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3978.0960993946674, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 188.9037213571779, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.09498391363023, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.02579026210347, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 109.08496102147217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.749411126280116, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 161.69893522471634, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.4430257786783773, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.34529521039352, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.7714067922127894, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 170.86523239479655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.608020176521034, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 181.05706010508592, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.277369339946695, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1463.0649649228315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.83848693136936, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1864.683141120113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.4841206172361, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2130.758830413485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.54699391922728, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2381.8935399566794, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.76036506870986, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2662.7577579295776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 132.5724441198216, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3078.79130536842, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 17.097525165274803, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2955.7832223272444, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.2189444201398, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3688.781307878483, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 65.65926515650821, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4183.4728233450305, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.81987978181542, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4939.824132342117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 289.1390313704078, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5502.544756998508, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 379.9176358151893, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5664.321185867887, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.74897438065652, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 246.62407640713522, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.589667669507943, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08780541388853, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.339251126835014, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 349.13408375848826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.707215404345545, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 420.6620028708826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.922885386248023, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 470.0593095392814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.595229921387679, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 495.115546467953, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.928558698066297, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:43.925526+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2171.099861571096, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 19.23255817429395, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2429.228219203666, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.04518738452575, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2756.5078091010796, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.73272767497978, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3197.349485288246, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 154.47555387593712, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3607.973454642879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.0597134090529, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3925.314914910963, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 293.48112660476045, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 104.57782310281735, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.873834118675967, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.5117553518436, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.407159402934873, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 142.08007511017124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.930090749895689, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.0629031829932, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.918041427401283, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.6427038678885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.828269431125875, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.60322195597215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.338879356636095, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1491.980189873357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 71.9836340794669, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1794.0628090299717, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 14.307364673980224, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2192.3591192326044, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 114.60420372385168, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2422.202702788314, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.26859163162072, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2770.8727103546726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 195.12079821799085, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2951.282362921916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.2254379990313, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3039.27661040724, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.6539091592498, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3578.211797262128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 159.14128724739464, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4128.29686489867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 223.4100922139098, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4848.219925955905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 77.93231029690887, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5070.191606088231, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.94019467972001, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5966.489310951252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 336.7173682128105, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 254.57850713986198, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.385164783606097, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 304.8091397808394, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.103188082400504, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.1613069208256, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.345582528912242, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 411.1456865029576, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.86244360659498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.04740645126986, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.597587190328635, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 545.743901896845, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 8.94286171044266, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:27.051913+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 1993.661134316776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 123.85525126992296, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2301.0905948917325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.48673687735095, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2873.4628362191897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.61249284171058, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3238.735403505523, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.51716037758475, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3728.4508889231124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.24607483750995, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.9082581910916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 74.76961240079906, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.88113187316719, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.905008641590433, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 121.61102013493655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.792042693243397, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.99528044475127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.222627363561376, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 163.077114107551, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.17919680914877, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.59968240327134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.466938787214904, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 198.73690996443867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.07228063106639, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1456.8721146219054, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.05357208107213, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1760.0202375360182, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 113.83470167982718, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2033.3289371002388, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 131.96155202489578, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2408.2974437457224, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.38445697767614, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2693.2667748312374, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.88552510962938, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2991.3045632907692, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.616739773559836, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3006.5513639744195, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.20153435546402, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3946.7240883975173, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 24.834845762711534, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4471.79595749108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 222.54023025674027, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4746.352137751869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 299.0771752770653, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5465.286069604949, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 348.6918957133431, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5823.519621687581, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3249644414966, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.32918263045667, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.03544118455393, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 288.1546272324227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.7727205750953, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 363.3503259942238, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.098142551778466, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 392.91985489944227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.846918288877376, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 456.7540443475017, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.728347618091988, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 499.13159330438293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.2322764193576, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:48:34.707858+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2038.9496500003788, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.27052133056621, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2294.3238192937456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.05216178962178, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.7462067242177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 120.10657812200931, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3330.947955167447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.07867992457224, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3427.804220062, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.398802753262366, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3931.7861541695424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 259.7643410153898, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 101.89870179257153, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.924103694663449, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 124.9849961475332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.073706451113821, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 150.17912140564707, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.831834198448414, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 165.06404530951897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.098638603407267, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 189.4271367424946, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.049029334825786, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 211.70091863399844, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.393712112471537, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1534.395057650628, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 81.6427334392383, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1778.474541262558, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 42.56143420705744, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2133.7461366070925, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 116.35913144113613, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2459.5790315346367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.71322011411286, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2637.4334475618302, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 183.30427116704686, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2944.098595726341, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.72289928237976, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2907.9632013559226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.53757173689922, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3509.107421580347, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 236.8620853533764, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4200.093284524192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 239.58028996799285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4713.504209113087, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 227.25719976419228, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5049.944494674869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.03307008996549, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6191.498973826217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.5921715209765, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.80616580373456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.592467485447356, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08520837227366, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.677266179208607, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.6038589068661, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.454584817104773, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 385.0134083066721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.301075636602707, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 444.0720671004903, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.366607976819555, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 544.9286314848067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.8252101632892845, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:43:24.047048+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2021.1035365873993, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.72840561483144, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2338.909416436906, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.64663652969023, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2858.077160911349, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 192.0675550591675, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3306.833623604521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.99029424270755, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3627.5542312476477, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.9433053351406, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3950.086638208113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.7800326425516, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.47479639005672, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.581115036930171, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 112.93833387666766, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.456175417231416, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 127.96521280400299, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.881167162370817, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 164.06646826051218, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.400563021933642, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 172.50207971758653, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.59514547087479, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 206.57752612959177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.6206498096027, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1450.762861653755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.85051722934544, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1744.8736145848297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 28.4724370062761, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2137.935073637293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.15696927062444, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2405.7909943176865, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 138.83795715557775, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2660.942840886126, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.5879766560021, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3070.783714494726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 225.80178015382134, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3021.0961116313642, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 63.199028430669784, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3562.444757764406, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 233.25324926372082, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4147.683102448584, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.47351186248994, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4681.79862307404, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 201.00316493809274, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5257.332484362561, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.82272792943763, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5860.230588756176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 370.86153080312647, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 245.42900602601247, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.361128649495964, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.16320013554315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.935265770560466, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.53233993081176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.5441134792233, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 397.50592062832635, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.267205299179718, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.56360681512984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 28.587460065910978, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.39520093238633, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.049730400867045, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:40:45.136466+00:00"}
-];
+benchmarkRuns = [];
 
 defaultCompareNames = [];

From 237750e9dc03ce8534d373c984e9fd8c56a72d4f Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 11 Mar 2025 15:07:14 -0700
Subject: [PATCH 28/79] [CI][Benchmark] Decouple results from existing file
 structure, fetch results from git instead of local

---
 devops/actions/run-tests/benchmark/action.yml | 112 +++++++++---------
 devops/scripts/benchmarks/main.py             |  23 +++-
 devops/scripts/benchmarks/options.py          |   1 +
 devops/scripts/benchmarks/output_html.py      |   4 +-
 4 files changed, 81 insertions(+), 59 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index c10a163261c13..f90808f730787 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -27,16 +27,25 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
+      RUNNER_NAME: ${{ runner.name }}
     run: |
       case "$RUNNER_TAG" in
-        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
+        '["PVC_PERF"]' ) ;;
         *)
           echo "#"
-          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
+          echo "# WARNING: Only specific tuned runners are fully supported."
           echo "# This workflow is not guaranteed to work with other runners."
           echo "#" ;;
       esac
 
+      # Ensure runner name has nothing injected
+      # TODO: in terms of security, is this overkill?
+      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+          exit 1
+      fi
+      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
+
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in
         level_zero:*) ;;
@@ -46,11 +55,11 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
+      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
+
   - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
-      # Taken from ur-benchmark-reusable.yml:
-
       # Compute the core range for the first NUMA node; second node is used by
       # UMF. Skip the first 4 cores as the kernel is likely to schedule more
       # work on these.
@@ -67,65 +76,62 @@ runs:
 
       ZE_AFFINITY_MASK=0
       echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+  - name: Checkout results repo
+    shell: bash
+    run: |
+      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
     shell: bash
     run: |
-      cat << EOF
-      #
-      # NOTE TO DEVELOPERS:
-      #
-
-      Check latter steps of the workflow: This job produces an artifact with:
-        - benchmark results from passing/failing tests
-        - log containing all failing (too slow) benchmarks
-        - log containing all erroring benchmarks
-
-      While this step in the workflow provides debugging output describing this
-      information, it might be easier to inspect the logs from the artifact
-      instead.
-
-      EOF
-      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      # TODO generate summary + display helpful message here
       export CMPLR_ROOT=./toolchain
       echo "-----"
       sycl-ls
       echo "-----"
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline --preset Minimal
+      echo "-----"
+      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+        "$(realpath ./llvm_test_workdir)" \
+        --sycl "$(realpath ./toolchain)" \
+        --save baseline \
+        --output-html remote \
+        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --preset Minimal
       echo "-----"
       ls
-#  - name: Push compute-benchmarks results
-#    if: always()
-#    shell: bash
-#    run: |
-#      # TODO -- waiting on security clearance
-#      # Load configuration values
-#      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-#
-#      cd "./llvm-ci-perf-results"
-#      git config user.name "SYCL Benchmarking Bot"
-#      git config user.email "sys_sycl_benchmarks@intel.com"
-#      git pull
-#      git add .
-#      # Make sure changes have been made
-#      if git diff --quiet && git diff --cached --quiet; then
-#        echo "No new results added, skipping push."
-#      else
-#        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-#        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
-#      fi
-  - name: Find benchmark result artifact here
+  - name: Push compute-benchmarks results
     if: always()
     shell: bash
     run: |
-      cat << EOF
-      #
-      # Artifact link for benchmark results here:
-      #
-      EOF
-  - name: Archive compute-benchmark results
-    if: always()
-    uses: actions/upload-artifact@v4
-    with:
-      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-      path: ./artifact
+      # TODO redo configuration
+      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+
+      cd "./llvm-ci-perf-results"
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "sys_sycl_benchmarks@intel.com"
+      git pull
+      git add .
+      # Make sure changes have been made
+      if git diff --quiet && git diff --cached --quiet; then
+        echo "No new results added, skipping push."
+      else
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
+      fi
+#  - name: Find benchmark result artifact here
+#    if: always()
+#    shell: bash
+#    run: |
+#      cat << EOF
+#      #
+#      # Artifact link for benchmark results here:
+#      #
+#      EOF
+#  - name: Archive compute-benchmark results
+#    if: always()
+#    uses: actions/upload-artifact@v4
+#    with:
+#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+#      path: ./artifact
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 91f84917f8698..1a15e5407daf3 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -265,11 +265,15 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             this_name, chart_data, failures, options.output_markdown
         )
 
-        with open("benchmark_results.md", "w") as file:
+        md_path = options.output_directory
+        if options.output_directory is None:
+            md_path = os.getcwd()
+
+        with open(os.path.join(md_path, "benchmark_results.md"), "w") as file:
             file.write(markdown_content)
 
         print(
-            f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md"
+            f"Markdown with benchmark results has been written to {md_path}/benchmark_results.md"
         )
 
     saved_name = save_name if save_name is not None else this_name
@@ -283,7 +287,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             compare_names.append(saved_name)
 
     if options.output_html:
-        generate_html(history.runs, compare_names)
+        html_path = options.output_directory
+        if options.output_directory is None:
+            html_path = os.path.join(os.path.dirname(__file__), "html")
+        generate_html(history.runs, compare_names, html_path)
 
 
 def validate_and_parse_env_args(env_args):
@@ -398,6 +405,12 @@ def validate_and_parse_env_args(env_args):
         const=options.output_html,
         choices=["local", "remote"],
     )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        help="Location for output files, if --output-html or --output_markdown was specified.",
+        default=None
+    )
     parser.add_argument(
         "--dry-run",
         help="Do not run any actual benchmarks",
@@ -486,6 +499,10 @@ def validate_and_parse_env_args(env_args):
     if args.compute_runtime is not None:
         options.build_compute_runtime = True
         options.compute_runtime_tag = args.compute_runtime
+    if args.output_dir is not None:
+        if not os.path.isdir(args.output_dir):
+            parser.error("Specified --output-dir is not a valid path")
+        options.output_directory = os.path.abspath(args.output_dir)
 
     benchmark_filter = re.compile(args.filter) if args.filter else None
 
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 7600942acd1e5..332d1615bc78d 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -31,6 +31,7 @@ class Options:
     compare_max: int = 10  # average/median over how many results
     output_markdown: MarkdownSize = MarkdownSize.SHORT
     output_html: str = "local"
+    output_directory: str = None
     dry_run: bool = False
     stddev_threshold: float = 0.02
     iterations_stddev: int = 5
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 53dd4b1e8f968..49b4d1d84a214 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -8,9 +8,7 @@
 from options import options
 
 
-def generate_html(benchmark_runs: list, compare_names: list[str]):
-    # create path to data.js in html folder
-    html_path = os.path.join(os.path.dirname(__file__), "html")
+def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str):
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
 
     if options.output_html == "local":

From ba1297fe66693ef025b2aa6c14ebfc17bf2c3651 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 12 Mar 2025 09:00:38 -0700
Subject: [PATCH 29/79] [benchmark] Disabling UR test suites

---
 .github/workflows/ur-benchmarks-reusable.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index d7c32edfdfc2a..0aecbffc20fe7 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -161,7 +161,6 @@ jobs:
         taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build
-        --ur ${{ github.workspace }}/ur_install
         --adapter ${{ matrix.adapter.str_name }}
         --compare baseline
         --compute-runtime ${{ inputs.compute_runtime_commit }}
@@ -169,6 +168,9 @@ jobs:
         ${{ inputs.upload_report && '--output-html' || '' }}
         ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
         ${{ inputs.bench_script_params }}
+        # Temporarily disabled due to build faiures
+        # https://github.com/intel/llvm/actions/runs/13814877162/job/38645384849#step:14:849
+        # --ur ${{ github.workspace }}/ur_install
 
     - name: Print benchmark results
       run: |

From cd6097fdef7d77213b7a21658ca2e040fd9cf825 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 11:42:00 +0000
Subject: [PATCH 30/79] update compute benchmarks and fix requirements

---
 devops/scripts/benchmarks/benches/compute.py | 2 +-
 devops/scripts/benchmarks/requirements.txt   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 92818cc00fad2..4b48f16b5fc6b 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -28,7 +28,7 @@ def setup(self):
             self.directory,
             "compute-benchmarks-repo",
             "https://github.com/intel/compute-benchmarks.git",
-            "9369275026229b182bc4a555b73c2ec995a9e2b7",
+            "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba",
         )
         build_path = create_build_path(self.directory, "compute-benchmarks-build")
 
diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
index 99ba0caab55c2..9f0381ceef6c2 100644
--- a/devops/scripts/benchmarks/requirements.txt
+++ b/devops/scripts/benchmarks/requirements.txt
@@ -2,3 +2,4 @@ matplotlib==3.9.2
 mpld3==0.5.10
 dataclasses-json==0.6.7
 PyYAML==6.0.1
+Mako==1.3.9

From c4e92c6ac7a64ae26f9c15ea383473b71637c1e2 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 12:09:48 +0000
Subject: [PATCH 31/79] fix url updates

---
 devops/scripts/benchmarks/html/scripts.js | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 7ba00738e727a..2bd52a70b07c8 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -13,12 +13,13 @@ let timeseriesData, barChartsData, allRunNames;
 let runSelect, selectedRunsDiv, suiteFiltersContainer;
 
 // Run selector functions
-function updateSelectedRuns() {
+function updateSelectedRuns(forceUpdate = true) {
     selectedRunsDiv.innerHTML = '';
     activeRuns.forEach(name => {
         selectedRunsDiv.appendChild(createRunElement(name));
     });
-    updateCharts();
+    if (forceUpdate)
+        updateCharts();
 }
 
 function createRunElement(name) {
@@ -439,7 +440,7 @@ function setupRunSelector() {
         runSelect.appendChild(option);
     });
 
-    updateSelectedRuns();
+    updateSelectedRuns(false);
 }
 
 function setupSuiteFilters() {

From ed8eecce3d20e19f471ec65bb59b851bd215b486 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 12:24:41 +0000
Subject: [PATCH 32/79] use timestamps in result file names

---
 devops/scripts/benchmarks/history.py      | 26 +++++++++--------------
 devops/scripts/benchmarks/utils/result.py |  1 +
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 2b7002ed7faa9..2ed63d129d140 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -13,7 +13,6 @@
 
 
 class BenchmarkHistory:
-    benchmark_run_index_max = 0
     runs = []
 
     def __init__(self, dir):
@@ -35,28 +34,22 @@ def load(self, n: int):
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
 
-        # Extract index numbers and sort files by index number
-        def extract_index(file_path: Path) -> int:
+        # Extract timestamp and sort files by it
+        def extract_timestamp(file_path: Path) -> str:
             try:
-                return int(file_path.stem.split("_")[0])
-            except (IndexError, ValueError):
-                return -1
+                return file_path.stem.split("_")[-1]
+            except IndexError:
+                return ""
 
-        benchmark_files = [
-            file for file in benchmark_files if extract_index(file) != -1
-        ]
-        benchmark_files.sort(key=extract_index)
+        benchmark_files.sort(key=extract_timestamp, reverse=True)
 
         # Load the first n benchmark files
         benchmark_runs = []
-        for file_path in benchmark_files[n::-1]:
+        for file_path in benchmark_files[:n]:
             benchmark_run = self.load_result(file_path)
             if benchmark_run:
                 benchmark_runs.append(benchmark_run)
 
-        if benchmark_files:
-            self.benchmark_run_index_max = extract_index(benchmark_files[-1])
-
         self.runs = benchmark_runs
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
@@ -102,10 +95,11 @@ def save(self, save_name, results: list[Result], to_file=True):
         results_dir = Path(os.path.join(self.dir, "results"))
         os.makedirs(results_dir, exist_ok=True)
 
-        self.benchmark_run_index_max += 1
+        # Use formatted timestamp for the filename
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         file_path = Path(
             os.path.join(
-                results_dir, f"{self.benchmark_run_index_max}_{save_name}.json"
+                results_dir, f"{save_name}_{timestamp}.json"
             )
         )
         with file_path.open("w") as file:
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 4e65a3b8aa582..7d82d9e488edf 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -35,6 +35,7 @@ class Result:
 class BenchmarkRun:
     results: list[Result]
     name: str = "This PR"
+    hostname: str = "Unknown"
     git_hash: str = ""
     github_repo: str = None
     date: datetime = field(

From 130212d2a2e0b1045605033a09412f430d13721a Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 12:29:36 +0000
Subject: [PATCH 33/79] add hostname to benchmark run

---
 devops/scripts/benchmarks/history.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 2ed63d129d140..d1bdc3bfdb940 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -6,6 +6,7 @@
 import os
 import json
 from pathlib import Path
+import socket
 from utils.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
@@ -82,6 +83,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
             github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
+            hostname=socket.gethostname()
         )
 
     def save(self, save_name, results: list[Result], to_file=True):
@@ -132,6 +134,7 @@ def compute_average(self, data: list[BenchmarkRun]):
             name=first_run.name,
             git_hash="average",
             date=first_run.date,  # should this be different?
+            hostname=first_run.hostname
         )
 
         return average_benchmark_run

From 5323386c59d2457d79d1fee27b55dffc93be74a3 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 17:00:23 +0000
Subject: [PATCH 34/79] add SubmitGraph benchmark

... and apply black formatting.
---
 devops/scripts/benchmarks/benches/compute.py | 70 +++++++++++++++++---
 devops/scripts/benchmarks/history.py         | 10 +--
 devops/scripts/benchmarks/main.py            |  2 +-
 devops/scripts/benchmarks/options.py         |  1 +
 devops/scripts/benchmarks/presets.py         |  2 +-
 5 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 1f335cd8838ec..bc9d1d9d80d8a 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -28,7 +28,7 @@ def setup(self):
             self.directory,
             "compute-benchmarks-repo",
             "https://github.com/intel/compute-benchmarks.git",
-            "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba",
+            "b5cc46acf61766ab00da04e85bd4da4f7591eb21",
         )
         build_path = create_build_path(self.directory, "compute-benchmarks-build")
 
@@ -87,6 +87,19 @@ def benchmarks(self) -> list[Benchmark]:
             UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1),
         ]
 
+        for in_order_queue in [0, 1]:
+            for num_kernels in [4, 32]:
+                for measure_completion_time in [0, 1]:
+                    benches.append(
+                        GraphApiSubmitGraph(
+                            self,
+                            RUNTIMES.SYCL,
+                            in_order_queue,
+                            num_kernels,
+                            measure_completion_time,
+                        )
+                    )
+
         if options.ur is not None:
             benches += [
                 SubmitKernelUR(self, 0, 0),
@@ -536,14 +549,46 @@ def bin_args(self) -> list[str]:
             "--immediateAppendCmdList=0",
         ]
 
+
+class GraphApiSubmitGraph(ComputeBenchmark):
+    def __init__(
+        self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime
+    ):
+        self.inOrderQueue = inOrderQueue
+        self.numKernels = numKernels
+        self.runtime = runtime
+        self.measureCompletionTime = measureCompletionTime
+        super().__init__(bench, f"graph_api_benchmark_{runtime.value}", "SubmitGraph")
+
+    def explicit_group(self):
+        return f"SubmitGraph {self.numKernels}"
+
+    def description(self) -> str:
+        return (
+            f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+            f"trivial kernels using graphs. Tests overhead and benefits of graph-based execution."
+        )
+
+    def name(self):
+        return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=10000",
+            f"--NumKernels={self.numKernels}",
+            f"--MeasureCompletionTime={self.measureCompletionTime}",
+            f"--InOrderQueue={self.inOrderQueue}",
+            "--Profiling=0",
+            "--KernelExecutionTime=1",
+        ]
+
+
 class UllsEmptyKernel(ComputeBenchmark):
     def __init__(self, bench, runtime: RUNTIMES, wgc, wgs):
         self.wgc = wgc
         self.wgs = wgs
         self.runtime = runtime
-        super().__init__(
-            bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel"
-        )
+        super().__init__(bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel")
 
     def explicit_group(self):
         return f"EmptyKernel {self.wgc} {self.wgs}"
@@ -561,8 +606,19 @@ def bin_args(self) -> list[str]:
             f"--wgc={self.wgs}",
         ]
 
+
 class UllsKernelSwitch(ComputeBenchmark):
-    def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVisible, ioq, ctrBasedEvents):
+    def __init__(
+        self,
+        bench,
+        runtime: RUNTIMES,
+        count,
+        kernelTime,
+        barrier,
+        hostVisible,
+        ioq,
+        ctrBasedEvents,
+    ):
         self.count = count
         self.kernelTime = kernelTime
         self.barrier = barrier
@@ -570,9 +626,7 @@ def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVis
         self.ctrBasedEvents = ctrBasedEvents
         self.runtime = runtime
         self.ioq = ioq
-        super().__init__(
-            bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch"
-        )
+        super().__init__(bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch")
 
     def explicit_group(self):
         return f"KernelSwitch {self.count} {self.kernelTime}"
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index d1bdc3bfdb940..f05e0192d26ee 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -83,7 +83,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
             github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
-            hostname=socket.gethostname()
+            hostname=socket.gethostname(),
         )
 
     def save(self, save_name, results: list[Result], to_file=True):
@@ -99,11 +99,7 @@ def save(self, save_name, results: list[Result], to_file=True):
 
         # Use formatted timestamp for the filename
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        file_path = Path(
-            os.path.join(
-                results_dir, f"{save_name}_{timestamp}.json"
-            )
-        )
+        file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file:
             json.dump(serialized, file, indent=4)
         print(f"Benchmark results saved to {file_path}")
@@ -134,7 +130,7 @@ def compute_average(self, data: list[BenchmarkRun]):
             name=first_run.name,
             git_hash="average",
             date=first_run.date,  # should this be different?
-            hostname=first_run.hostname
+            hostname=first_run.hostname,
         )
 
         return average_benchmark_run
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 1a15e5407daf3..1d7304ea5e212 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -409,7 +409,7 @@ def validate_and_parse_env_args(env_args):
         "--output-dir",
         type=str,
         help="Location for output files, if --output-html or --output_markdown was specified.",
-        default=None
+        default=None,
     )
     parser.add_argument(
         "--dry-run",
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 332d1615bc78d..ced76a5d692f2 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -3,6 +3,7 @@
 
 from presets import presets
 
+
 class Compare(Enum):
     LATEST = "latest"
     AVERAGE = "average"
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 7f5dc8d78460a..e394a8b4b622e 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -30,9 +30,9 @@
     ],
 }
 
+
 def enabled_suites(preset: str) -> list[str]:
     try:
         return presets[preset]
     except KeyError:
         raise ValueError(f"Preset '{preset}' not found.")
-

From 5bd1d568a4371041dab01e071349a2d392c409ba Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 12:07:11 -0700
Subject: [PATCH 35/79] Restore sycl-linux-run-tests benchmarking action

---
 devops/actions/run-tests/benchmark/action.yml | 106 +++++++++++-------
 1 file changed, 68 insertions(+), 38 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 7f69fdf832982..f90808f730787 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -27,16 +27,25 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
+      RUNNER_NAME: ${{ runner.name }}
     run: |
       case "$RUNNER_TAG" in
-        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
+        '["PVC_PERF"]' ) ;;
         *)
           echo "#"
-          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
+          echo "# WARNING: Only specific tuned runners are fully supported."
           echo "# This workflow is not guaranteed to work with other runners."
           echo "#" ;;
       esac
 
+      # Ensure runner name has nothing injected
+      # TODO: in terms of security, is this overkill?
+      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+          exit 1
+      fi
+      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
+
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in
         level_zero:*) ;;
@@ -46,37 +55,58 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
-  - name: Run compute-benchmarks
+      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
+
+  - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
-      cat << EOF
-      #
-      # NOTE TO DEVELOPERS:
-      #
-
-      Check latter steps of the workflow: This job produces an artifact with:
-        - benchmark results from passing/failing tests
-        - log containing all failing (too slow) benchmarks
-        - log containing all erroring benchmarks
+      # Compute the core range for the first NUMA node; second node is used by
+      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
+      # work on these.
+      CORES="$(lscpu | awk '
+        /NUMA node0 CPU|On-line CPU/ {line=$0}
+        END {
+          split(line, a, " ")
+          split(a[4], b, ",")
+          sub(/^0/, "4", b[1])
+          print b[1]
+        }')"
+      echo "CPU core range to use: $CORES"
+      echo "CORES=$CORES" >> $GITHUB_ENV
 
-      While this step in the workflow provides debugging output describing this
-      information, it might be easier to inspect the logs from the artifact
-      instead.
-
-      EOF
-      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      ZE_AFFINITY_MASK=0
+      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+  - name: Checkout results repo
+    shell: bash
+    run: |
+      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
+  - name: Run compute-benchmarks
+    shell: bash
+    run: |
+      # TODO generate summary + display helpful message here
       export CMPLR_ROOT=./toolchain
       echo "-----"
       sycl-ls
       echo "-----"
-      ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
+      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
+      echo "-----"
+      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+        "$(realpath ./llvm_test_workdir)" \
+        --sycl "$(realpath ./toolchain)" \
+        --save baseline \
+        --output-html remote \
+        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --preset Minimal
+      echo "-----"
+      ls
   - name: Push compute-benchmarks results
     if: always()
     shell: bash
     run: |
-      # TODO -- waiting on security clearance
-      # Load configuration values
-      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+      # TODO redo configuration
+      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
 
       cd "./llvm-ci-perf-results"
       git config user.name "SYCL Benchmarking Bot"
@@ -88,20 +118,20 @@ runs:
         echo "No new results added, skipping push."
       else
         git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
       fi
-  - name: Find benchmark result artifact here
-    if: always()
-    shell: bash
-    run: |
-      cat << EOF
-      #
-      # Artifact link for benchmark results here:
-      #
-      EOF
-  - name: Archive compute-benchmark results
-    if: always()
-    uses: actions/upload-artifact@v4
-    with:
-      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-      path: ./artifact
+#  - name: Find benchmark result artifact here
+#    if: always()
+#    shell: bash
+#    run: |
+#      cat << EOF
+#      #
+#      # Artifact link for benchmark results here:
+#      #
+#      EOF
+#  - name: Archive compute-benchmark results
+#    if: always()
+#    uses: actions/upload-artifact@v4
+#    with:
+#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+#      path: ./artifact

From e9b1375dd0075dd053839370d5fc3bcf95cc4390 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 12:36:55 -0700
Subject: [PATCH 36/79] Restore old SYCL benchmarking CI

---
 .github/workflows/sycl-linux-run-tests.yml    |  10 ++
 devops/actions/run-tests/benchmark/action.yml |  88 +++++-------
 .../actions/run-tests/benchmark_v2/action.yml | 134 ++++++++++++++++++
 3 files changed, 183 insertions(+), 49 deletions(-)
 create mode 100644 devops/actions/run-tests/benchmark_v2/action.yml

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index c30c5eccbcb62..f5b243cb7fc05 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -155,6 +155,7 @@ on:
           - e2e
           - cts
           - compute-benchmarks
+          - benchmark_v2
 
       env:
         description: |
@@ -330,3 +331,12 @@ jobs:
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+
+    - name: Run benchmarks
+      if: inputs.tests_selector == 'benchmark_v2'
+      uses: ./devops/actions/run-tests/benchmark_v2
+      with:
+        target_devices: ${{ inputs.target_devices }}
+      env:
+        RUNNER_TAG: ${{ inputs.runner }}
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index f90808f730787..03b7d4ad776fd 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -27,25 +27,16 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
-      RUNNER_NAME: ${{ runner.name }}
     run: |
       case "$RUNNER_TAG" in
-        '["PVC_PERF"]' ) ;;
+        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
         *)
           echo "#"
-          echo "# WARNING: Only specific tuned runners are fully supported."
+          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
           echo "# This workflow is not guaranteed to work with other runners."
           echo "#" ;;
       esac
 
-      # Ensure runner name has nothing injected
-      # TODO: in terms of security, is this overkill?
-      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
-          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
-          exit 1
-      fi
-      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
-
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in
         level_zero:*) ;;
@@ -55,11 +46,11 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
-      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
-
   - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
+      # Taken from ur-benchmark-reusable.yml:
+
       # Compute the core range for the first NUMA node; second node is used by
       # UMF. Skip the first 4 cores as the kernel is likely to schedule more
       # work on these.
@@ -76,37 +67,36 @@ runs:
 
       ZE_AFFINITY_MASK=0
       echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
-  - name: Checkout results repo
-    shell: bash
-    run: |
-      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
     shell: bash
     run: |
-      # TODO generate summary + display helpful message here
+      cat << EOF
+      #
+      # NOTE TO DEVELOPERS:
+      #
+
+      Check latter steps of the workflow: This job produces an artifact with:
+        - benchmark results from passing/failing tests
+        - log containing all failing (too slow) benchmarks
+        - log containing all erroring benchmarks
+
+      While this step in the workflow provides debugging output describing this
+      information, it might be easier to inspect the logs from the artifact
+      instead.
+
+      EOF
+      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
       export CMPLR_ROOT=./toolchain
       echo "-----"
       sycl-ls
       echo "-----"
-      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      echo "-----"
-      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
-        "$(realpath ./llvm_test_workdir)" \
-        --sycl "$(realpath ./toolchain)" \
-        --save baseline \
-        --output-html remote \
-        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --preset Minimal
-      echo "-----"
-      ls
+      taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
   - name: Push compute-benchmarks results
     if: always()
     shell: bash
     run: |
-      # TODO redo configuration
-      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+      # Load configuration values
+      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
 
       cd "./llvm-ci-perf-results"
       git config user.name "SYCL Benchmarking Bot"
@@ -118,20 +108,20 @@ runs:
         echo "No new results added, skipping push."
       else
         git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
+        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
       fi
-#  - name: Find benchmark result artifact here
-#    if: always()
-#    shell: bash
-#    run: |
-#      cat << EOF
-#      #
-#      # Artifact link for benchmark results here:
-#      #
-#      EOF
-#  - name: Archive compute-benchmark results
-#    if: always()
-#    uses: actions/upload-artifact@v4
-#    with:
-#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-#      path: ./artifact
+  - name: Find benchmark result artifact here
+    if: always()
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # Artifact link for benchmark results here:
+      #
+      EOF
+  - name: Archive compute-benchmark results
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+      path: ./artifact
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
new file mode 100644
index 0000000000000..375bc20faf857
--- /dev/null
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -0,0 +1,134 @@
+name: 'Run benchmarks'
+
+# This action assumes the following prerequisites:
+#
+# - SYCL is placed in ./toolchain -- TODO change this
+# - /devops has been checked out in ./devops.
+# - env.GITHUB_TOKEN was properly set, because according to Github, that's
+#   apparently the recommended way to pass a secret into a github action:
+
+#   https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently,
+#   only specific runners are fully supported.
+
+inputs:
+  target_devices:
+    type: string
+    required: True
+
+runs:
+  using: "composite"
+  steps:
+  - name: Check specified runner type / target backend
+    shell: bash
+    env:
+      TARGET_DEVICE: ${{ inputs.target_devices }}
+      RUNNER_NAME: ${{ runner.name }}
+    run: |
+      case "$RUNNER_TAG" in
+        '["PVC_PERF"]' ) ;;
+        *)
+          echo "#"
+          echo "# WARNING: Only specific tuned runners are fully supported."
+          echo "# This workflow is not guaranteed to work with other runners."
+          echo "#" ;;
+      esac
+
+      # Ensure runner name has nothing injected
+      # TODO: in terms of security, is this overkill?
+      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+          exit 1
+      fi
+      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
+
+      # input.target_devices is not directly used, as this allows code injection
+      case "$TARGET_DEVICE" in
+        level_zero:*) ;;
+        *)
+          echo "#"
+          echo "# WARNING: Only level_zero backend is fully supported."
+          echo "# This workflow is not guaranteed to work with other backends."
+          echo "#" ;;
+      esac
+      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
+
+  - name: Compute CPU core range to run benchmarks on
+    shell: bash
+    run: |
+      # Compute the core range for the first NUMA node; second node is used by
+      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
+      # work on these.
+      CORES="$(lscpu | awk '
+        /NUMA node0 CPU|On-line CPU/ {line=$0}
+        END {
+          split(line, a, " ")
+          split(a[4], b, ",")
+          sub(/^0/, "4", b[1])
+          print b[1]
+        }')"
+      echo "CPU core range to use: $CORES"
+      echo "CORES=$CORES" >> $GITHUB_ENV
+
+      ZE_AFFINITY_MASK=0
+      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+  - name: Checkout results repo
+    shell: bash
+    run: |
+      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
+  - name: Run compute-benchmarks
+    shell: bash
+    run: |
+      # TODO generate summary + display helpful message here
+      export CMPLR_ROOT=./toolchain
+      echo "-----"
+      sycl-ls
+      echo "-----"
+      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
+      echo "-----"
+      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+        "$(realpath ./llvm_test_workdir)" \
+        --sycl "$(realpath ./toolchain)" \
+        --save baseline \
+        --output-html remote \
+        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --preset Minimal
+      echo "-----"
+      ls
+  - name: Push compute-benchmarks results
+    if: always()
+    shell: bash
+    run: |
+      # TODO redo configuration
+      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+
+      cd "./llvm-ci-perf-results"
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "sys_sycl_benchmarks@intel.com"
+      git pull
+      git add .
+      # Make sure changes have been made
+      if git diff --quiet && git diff --cached --quiet; then
+        echo "No new results added, skipping push."
+      else
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
+      fi
+#  - name: Find benchmark result artifact here
+#    if: always()
+#    shell: bash
+#    run: |
+#      cat << EOF
+#      #
+#      # Artifact link for benchmark results here:
+#      #
+#      EOF
+#  - name: Archive compute-benchmark results
+#    if: always()
+#    uses: actions/upload-artifact@v4
+#    with:
+#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+#      path: ./artifact

From a3edf7aff115c3ebb64c90afe042a177ad4ea2c4 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 12:47:26 -0700
Subject: [PATCH 37/79] Add benchmarking results to sycl-docs.yml

---
 .github/workflows/sycl-docs.yml          | 1 +
 devops/scripts/benchmarks/html/config.js | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml
index 5c1e8e425111b..a45c56bdd869c 100644
--- a/.github/workflows/sycl-docs.yml
+++ b/.github/workflows/sycl-docs.yml
@@ -49,6 +49,7 @@ jobs:
         mkdir clang
         mv $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* .
         mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/
+        cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks
         touch .nojekyll
     # Upload the generated docs as an artifact and deploy to GitHub Pages.
     - name: Upload artifact
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
index 3e67ae1dce8e5..0a8551c5de152 100644
--- a/devops/scripts/benchmarks/html/config.js
+++ b/devops/scripts/benchmarks/html/config.js
@@ -1,2 +1,2 @@
-//remoteDataUrl = 'https://example.com/data.json';
+remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json';
 //defaultCompareNames = ['baseline'];

From 6620e4a889664a031414af2107e423f9b7e60169 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= <lukasz.stolarczuk@intel.com>
Date: Thu, 13 Mar 2025 21:20:44 +0100
Subject: [PATCH 38/79] [CI] Bump compute bench (#17431)

- [x] remove the second, test commit
---
 .github/workflows/ur-benchmarks-reusable.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index 0aecbffc20fe7..d7c32edfdfc2a 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -161,6 +161,7 @@ jobs:
         taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build
+        --ur ${{ github.workspace }}/ur_install
         --adapter ${{ matrix.adapter.str_name }}
         --compare baseline
         --compute-runtime ${{ inputs.compute_runtime_commit }}
@@ -168,9 +169,6 @@ jobs:
         ${{ inputs.upload_report && '--output-html' || '' }}
         ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
         ${{ inputs.bench_script_params }}
-        # Temporarily disabled due to build faiures
-        # https://github.com/intel/llvm/actions/runs/13814877162/job/38645384849#step:14:849
-        # --ur ${{ github.workspace }}/ur_install
 
     - name: Print benchmark results
       run: |

From f4a2e39ad21e498d090fcacf62e519574a3cc0b6 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 16:03:50 -0700
Subject: [PATCH 39/79] Initial implementation of unified benchmark workflow

---
 .github/workflows/benchmark.yml | 122 ++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 .github/workflows/benchmark.yml

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000000000..3837b119a10e3
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,122 @@
+name: Run Benchmarks
+
+on:
+  schedule:
+    - cron: '0 1 * * *'  # 2 hrs earlier than sycl-nightly.yml
+  workflow_call:
+    inputs:
+      pr_no:
+        type: number
+        required: false
+      upload_results:
+        type: bool
+        required: true
+      runner:
+        type: string
+        required: true
+      backend:
+        type: string
+        required: true
+      reset_intel_gpu:
+        type: bool
+        required: true
+        default: true
+
+  workflow_dispatch:
+    inputs:
+      pr_no:
+        description: Specific PR no. to build
+        type: number
+        required: false
+      upload_results:
+        description: 'Save and upload results'
+        type: choice
+        options:
+          - false
+          - true
+        default: true
+      runner:
+        type: choice
+        options:
+          - '["PVC_PERF"]'
+      backend:
+        description: Backend to use
+        type: choice
+        options:
+          - 'level_zero:gpu'
+        # TODO L0 V2 support
+      reset_intel_gpu:
+        description: Reset Intel GPUs
+        type: choice
+        options:
+          - false
+          - true
+        default: true
+
+permissions:
+  contents: read
+  packages: read
+
+jobs:
+  build_sycl:
+    name: Build SYCL from PR
+    if: inputs.pr_no != null
+    uses: ./.github/workflows/sycl-linux-build.yml
+    with:
+      build_ref: "origin/pr/${{ inputs.pr_no }}/merge"
+      build_cache_root: "/__w/"
+      build_artifact_suffix: "default"
+      build_cache_suffix: "default"
+      # Docker image has last nightly pre-installed and added to the PATH
+      build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
+      cc: clang
+      cxx: clang++
+
+  run_benchmarks_build:
+    name: Run Benchmarks (on PR Build)
+    needs: [ build_sycl ]
+    if: inputs.pr_no != null
+    strategy:
+      matrix:
+        # Set default values if not specified:
+        include:
+          - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
+            backend: ${{ inputs.backend || 'level_zero:gpu' }}
+            reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }}
+            ref: origin/pr/${{ inputs.pr_no }}/merge
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    secrets: inherit
+    with:
+      # TODO support other benchmarks
+      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
+      runner: ${{ matrix.runner }}
+      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
+      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+      target_devices: ${{ matrix.backend }}
+      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
+      tests_selector: benchmark_v2
+      repo_ref: ${{ matrix.ref }}
+      sycl_toolchain_artifact: sycl_linux_default
+      sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }}
+
+  run_benchmarks_nightly:
+    name: Run Benchmarks (on Nightly Build)
+    if: inputs.pr_no == 0
+    strategy:
+      matrix:
+        # Set default values if not specified:
+        include:
+          - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
+            backend: ${{ inputs.backend || 'level_zero:gpu' }}
+            reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }}
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    with:
+      # TODO support other benchmarks
+      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
+      runner: ${{ matrix.runner }}
+      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
+      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+      target_devices: ${{ matrix.backend }}
+      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
+      tests_selector: benchmark_v2
\ No newline at end of file

From 38394bb5bff746d9b6e57da0f99d91d530412641 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 16:11:56 -0700
Subject: [PATCH 40/79] [CI] Use commit hash instead, fix issues with run

---
 .github/workflows/benchmark.yml | 39 ++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 3837b119a10e3..f044cbb066757 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -5,11 +5,11 @@ on:
     - cron: '0 1 * * *'  # 2 hrs earlier than sycl-nightly.yml
   workflow_call:
     inputs:
-      pr_no:
-        type: number
+      commit_hash:
+        type: string
         required: false
       upload_results:
-        type: bool
+        type: string # true/false: workflow_dispatch does not support booleans
         required: true
       runner:
         type: string
@@ -18,16 +18,17 @@ on:
         type: string
         required: true
       reset_intel_gpu:
-        type: bool
+        type: string  # true/false: workflow_dispatch does not support booleans
         required: true
         default: true
 
   workflow_dispatch:
     inputs:
-      pr_no:
-        description: Specific PR no. to build
-        type: number
+      commit_hash:
+        description: Commit hash to build intel/llvm from
+        type: string
         required: false
+        default: ''
       upload_results:
         description: 'Save and upload results'
         type: choice
@@ -53,17 +54,15 @@ on:
           - true
         default: true
 
-permissions:
-  contents: read
-  packages: read
+permissions: read-all
 
 jobs:
   build_sycl:
     name: Build SYCL from PR
-    if: inputs.pr_no != null
+    if: inputs.commit_hash != ''
     uses: ./.github/workflows/sycl-linux-build.yml
     with:
-      build_ref: "origin/pr/${{ inputs.pr_no }}/merge"
+      build_ref: ${{ inputs.commit_hash }}
       build_cache_root: "/__w/"
       build_artifact_suffix: "default"
       build_cache_suffix: "default"
@@ -71,19 +70,20 @@ jobs:
       build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
       cc: clang
       cxx: clang++
+      changes: '[]'
 
   run_benchmarks_build:
     name: Run Benchmarks (on PR Build)
     needs: [ build_sycl ]
-    if: inputs.pr_no != null
+    if: inputs.commit_hash != ''
     strategy:
       matrix:
         # Set default values if not specified:
         include:
           - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
             backend: ${{ inputs.backend || 'level_zero:gpu' }}
-            reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }}
-            ref: origin/pr/${{ inputs.pr_no }}/merge
+            reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
+            ref: ${{ inputs.commit_hash }}
     uses: ./.github/workflows/sycl-linux-run-tests.yml
     secrets: inherit
     with:
@@ -96,21 +96,23 @@ jobs:
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
       repo_ref: ${{ matrix.ref }}
+      devops_ref: ${{ github.ref }}
       sycl_toolchain_artifact: sycl_linux_default
       sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }}
       sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }}
 
   run_benchmarks_nightly:
     name: Run Benchmarks (on Nightly Build)
-    if: inputs.pr_no == 0
+    if: inputs.commit_hash == ''
     strategy:
       matrix:
         # Set default values if not specified:
         include:
           - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
             backend: ${{ inputs.backend || 'level_zero:gpu' }}
-            reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }}
+            reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
     uses: ./.github/workflows/sycl-linux-run-tests.yml
+    secrets: inherit
     with:
       # TODO support other benchmarks
       name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
@@ -119,4 +121,5 @@ jobs:
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
-      tests_selector: benchmark_v2
\ No newline at end of file
+      tests_selector: benchmark_v2
+      repo_ref: ${{ github.ref }}

From f232b93cec0f35c07e2c2ac416bc7699523b0496 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 10:59:56 +0100
Subject: [PATCH 41/79] add benchmark metadata

---
 devops/scripts/benchmarks/benches/base.py    |  25 ++-
 devops/scripts/benchmarks/benches/compute.py |  29 +++-
 devops/scripts/benchmarks/benches/test.py    |  40 +++--
 devops/scripts/benchmarks/html/index.html    | 121 +++++++++++++-
 devops/scripts/benchmarks/html/scripts.js    | 158 ++++++++++++++++---
 devops/scripts/benchmarks/main.py            |  21 ++-
 devops/scripts/benchmarks/output_html.py     |  22 ++-
 devops/scripts/benchmarks/utils/result.py    |   9 ++
 8 files changed, 376 insertions(+), 49 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 77365220dbf85..1135a267864a9 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -6,7 +6,7 @@
 import os
 import shutil
 from pathlib import Path
-from utils.result import Result
+from utils.result import BenchmarkMetadata, Result
 from options import options
 from utils.utils import download, run
 import urllib.request
@@ -78,6 +78,9 @@ def download(
     def name(self):
         raise NotImplementedError()
 
+    def description(self):
+        return "No description provided."
+
     def lower_is_better(self):
         return True
 
@@ -96,6 +99,23 @@ def stddev_threshold(self):
     def get_suite_name(self) -> str:
         return self.suite.name()
 
+    def result_names(self) -> list[str]:
+        return [self.name()]
+
+    def notes(self) -> str:
+        return None
+
+    def unstable(self) -> str:
+        return None
+
+    def get_metadata(self) -> BenchmarkMetadata:
+        return BenchmarkMetadata(
+            type='benchmark',
+            description=self.description(),
+            notes=self.notes(),
+            unstable=self.unstable(),
+        )
+
 
 class Suite:
     def benchmarks(self) -> list[Benchmark]:
@@ -106,3 +126,6 @@ def name(self) -> str:
 
     def setup(self):
         return
+
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {}
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index bc9d1d9d80d8a..67ec0bf2087ff 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -8,7 +8,7 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from utils.result import Result
+from utils.result import BenchmarkMetadata, Result
 from options import options
 from enum import Enum
 
@@ -54,6 +54,23 @@ def setup(self):
 
         self.built = True
 
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {
+            "SubmitKernel" : BenchmarkMetadata(
+                type="group",
+                description="Measures CPU time overhead of submitting kernels through different APIs.",
+                notes="Each layer builds on top of the previous layer, adding functionality and overhead. "
+                      "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. "
+                      "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance."
+                      "Work is ongoing to reduce the overhead of the SYCL API",
+            ),
+            "SinKernelGraph" : BenchmarkMetadata(
+                type="group",
+                unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
+            ),
+        }
+
+
     def benchmarks(self) -> list[Benchmark]:
         if options.sycl is None:
             return []
@@ -106,14 +123,7 @@ def benchmarks(self) -> list[Benchmark]:
                 SubmitKernelUR(self, 1, 0),
                 SubmitKernelUR(self, 1, 1),
                 MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
-                MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
-                MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
-                MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
                 MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
-                MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
-                MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
-                MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
-                MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0),
                 MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
@@ -540,6 +550,9 @@ def description(self) -> str:
     def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}"
 
+    def unstable(self) -> str:
+        return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 18794d4e9c73c..e7451e24f25cf 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -6,7 +6,7 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from utils.result import Result
+from utils.result import BenchmarkMetadata, Result
 from utils.utils import run, create_build_path
 from options import options
 import os
@@ -24,33 +24,49 @@ def name(self) -> str:
 
     def benchmarks(self) -> list[Benchmark]:
         bench_configs = [
-            ("Memory Bandwidth", 2000, 200, "Foo Group"),
-            ("Latency", 100, 20, "Bar Group"),
-            ("Throughput", 1500, 150, "Foo Group"),
-            ("FLOPS", 3000, 300, "Foo Group"),
-            ("Cache Miss Rate", 250, 25, "Bar Group"),
+            ("Memory Bandwidth", 2000, 200, "Foo Group", None, None),
+            ("Latency", 100, 20, "Bar Group", "A Latency test note!", None),
+            ("Throughput", 1500, 150, "Foo Group", None, None),
+            ("FLOPS", 3000, 300, "Foo Group", None, "Unstable FLOPS test!"),
+            ("Cache Miss Rate", 250, 25, "Bar Group", "Test Note", "And another note!"),
         ]
 
         result = []
-        for base_name, base_value, base_diff, group in bench_configs:
+        for base_name, base_value, base_diff, group, notes, unstable in bench_configs:
             for variant in range(6):
                 value_multiplier = 1.0 + (variant * 0.2)
                 name = f"{base_name} {variant+1}"
                 value = base_value * value_multiplier
                 diff = base_diff * value_multiplier
 
-                result.append(TestBench(self, name, value, diff, group))
+                result.append(TestBench(self, name, value, diff, group, notes, unstable))
 
         return result
 
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {
+            "Foo Group" : BenchmarkMetadata(
+                type="group",
+                description="This is a test benchmark for Foo Group.",
+                notes="This is a test note for Foo Group.",
+            ),
+            "Bar Group" : BenchmarkMetadata(
+                type="group",
+                description="This is a test benchmark for Bar Group.",
+                unstable="This is an unstable note for Bar Group.",
+            ),
+        }
+
 
 class TestBench(Benchmark):
-    def __init__(self, suite, name, value, diff, group=""):
+    def __init__(self, suite, name, value, diff, group="", notes=None, unstable=None):
         super().__init__("", suite)
         self.bname = name
         self.value = value
         self.diff = diff
         self.group = group
+        self.notes_text = notes
+        self.unstable_text = unstable
 
     def name(self):
         return self.bname
@@ -64,6 +80,12 @@ def setup(self):
     def description(self) -> str:
         return f"This is a test benchmark for {self.bname}."
 
+    def notes(self) -> str:
+        return self.notes_text
+
+    def unstable(self) -> str:
+        return self.unstable_text
+
     def run(self, env_vars) -> list[Result]:
         random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
         return [
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index c10844f15c707..c40174b7f35a0 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -171,7 +171,98 @@
         .extra-info-entry em {
             color: #555;
         }
-</style>
+        .display-options-container {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .display-options-container label {
+            margin: 0 12px;
+            cursor: pointer;
+        }
+        .display-options-container input {
+            margin-right: 8px;
+        }
+        .benchmark-note {
+            background-color: #cfe2ff;
+            color: #084298;
+            padding: 10px;
+            margin-bottom: 10px;
+            border-radius: 5px;
+            border-left: 4px solid #084298;
+        }
+        .benchmark-unstable {
+            background-color: #f8d7da;
+            color: #842029;
+            padding: 10px;
+            margin-bottom: 10px;
+            border-radius: 5px;
+            border-left: 4px solid #842029;
+        }
+        .note-text {
+            color: #084298;
+        }
+        .unstable-warning {
+            color: #842029;
+            font-weight: bold;
+        }
+        .unstable-text {
+            color: #842029;
+        }
+        .options-container {
+            margin-bottom: 24px;
+            background: #e9ecef;
+            border-radius: 8px;
+            overflow: hidden;
+        }
+        .options-container summary {
+            padding: 12px 16px;
+            font-weight: 500;
+            cursor: pointer;
+            background: #dee2e6;
+            user-select: none;
+        }
+        .options-container summary:hover {
+            background: #ced4da;
+        }
+        .options-content {
+            padding: 16px;
+            display: flex;
+            flex-wrap: wrap;
+            gap: 24px;
+        }
+        .filter-section {
+            flex: 1;
+            min-width: 300px;
+        }
+        .filter-section h3 {
+            margin-top: 0;
+            margin-bottom: 12px;
+            font-size: 18px;
+            font-weight: 500;
+            text-align: left;
+        }
+        #suite-filters {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 8px;
+        }
+        .display-options {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+        }
+        .display-options label {
+            display: flex;
+            align-items: center;
+            cursor: pointer;
+        }
+        .display-options input {
+            margin-right: 8px;
+        }
+    </style>
 </head>
 <body>
     <div class="container">
@@ -182,9 +273,6 @@ <h1>Benchmark Results</h1>
         <div class="filter-container">
             <input type="text" id="bench-filter" placeholder="Regex...">
         </div>
-        <div class="suite-filter-container" id="suite-filters">
-            <!-- Suite checkboxes will be generated by JavaScript -->
-        </div>
         <div class="run-selector">
             <select id="run-select">
                 <option value="">Select a run to compare...</option>
@@ -192,6 +280,31 @@ <h1>Benchmark Results</h1>
             <button onclick="addSelectedRun()">Add</button>
             <div id="selected-runs" class="selected-runs"></div>
         </div>
+        <details class="options-container">
+            <summary>Options</summary>
+            <div class="options-content">
+                <div class="filter-section">
+                    <h3>Suites</h3>
+                    <div id="suite-filters">
+                        <!-- Suite checkboxes will be generated by JavaScript -->
+                    </div>
+                </div>
+
+                <div class="filter-section">
+                    <h3>Display Options</h3>
+                    <div class="display-options">
+                        <label>
+                            <input type="checkbox" id="show-notes" checked>
+                            Director's commentary
+                        </label>
+                        <label>
+                            <input type="checkbox" id="show-unstable">
+                            Show 'it works on my machine' scenarios
+                        </label>
+                    </div>
+                </div>
+            </div>
+        </details>
         <details class="timeseries" open>
             <summary>Historical Results</summary>
             <div class="charts"></div>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 2bd52a70b07c8..ed7e361e14275 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -12,6 +12,10 @@ let timeseriesData, barChartsData, allRunNames;
 // DOM Elements
 let runSelect, selectedRunsDiv, suiteFiltersContainer;
 
+// Add this at the top of the file with the other variable declarations
+let showNotes = true;
+let showUnstable = false;
+
 // Run selector functions
 function updateSelectedRuns(forceUpdate = true) {
     selectedRunsDiv.innerHTML = '';
@@ -85,7 +89,8 @@ function createChart(data, containerId, type) {
                 title: {
                     display: true,
                     text: data.unit
-                }
+                },
+                grace: '20%',
             }
         }
     };
@@ -178,7 +183,7 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
     // Create timeseries charts
     filteredTimeseriesData.forEach((data, index) => {
         const containerId = `timeseries-${index}`;
-        const container = createChartContainer(data, containerId);
+        const container = createChartContainer(data, containerId, 'benchmark');
         document.querySelector('.timeseries .charts').appendChild(container);
         createChart(data, containerId, 'time');
     });
@@ -186,7 +191,7 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
     // Create bar charts
     filteredBarChartsData.forEach((data, index) => {
         const containerId = `barchart-${index}`;
-        const container = createChartContainer(data, containerId);
+        const container = createChartContainer(data, containerId, 'group');
         document.querySelector('.bar-charts .charts').appendChild(container);
         createChart(data, containerId, 'bar');
     });
@@ -195,11 +200,41 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
     filterCharts();
 }
 
-function createChartContainer(data, canvasId) {
+function createChartContainer(data, canvasId, type) {
     const container = document.createElement('div');
     container.className = 'chart-container';
     container.setAttribute('data-label', data.label);
     container.setAttribute('data-suite', data.suite);
+    
+    // Check if this benchmark is marked as unstable
+    const metadata = metadataForLabel(data.label, type);
+    if (metadata && metadata.unstable) {
+        container.setAttribute('data-unstable', 'true');
+        
+        // Add unstable warning
+        const unstableWarning = document.createElement('div');
+        unstableWarning.className = 'benchmark-unstable';
+        unstableWarning.textContent = metadata.unstable;
+        unstableWarning.style.display = showUnstable ? 'block' : 'none';
+        container.appendChild(unstableWarning);
+    }
+    
+    // Add notes if present
+    if (metadata && metadata.notes) {
+        const noteElement = document.createElement('div');
+        noteElement.className = 'benchmark-note';
+        noteElement.textContent = metadata.notes;
+        noteElement.style.display = showNotes ? 'block' : 'none';
+        container.appendChild(noteElement);
+    }
+    
+    // Add description if present in metadata, but only for groups
+    if (metadata && metadata.description && metadata.type === "group") {
+        const descElement = document.createElement('div');
+        descElement.className = 'benchmark-description';
+        descElement.textContent = metadata.description;
+        container.appendChild(descElement);
+    }
 
     const canvas = document.createElement('canvas');
     canvas.id = canvasId;
@@ -221,11 +256,10 @@ function createChartContainer(data, canvasId) {
     summary.appendChild(downloadButton);
     details.appendChild(summary);
 
-    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
-
     // Create and append extra info
     const extraInfo = document.createElement('div');
     extraInfo.className = 'extra-info';
+    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
     extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
     details.appendChild(extraInfo);
 
@@ -234,6 +268,16 @@ function createChartContainer(data, canvasId) {
     return container;
 }
 
+function metadataForLabel(label, type) {
+    for (const [key, metadata] of Object.entries(benchmarkMetadata)) {
+        if (metadata.type === type && label.startsWith(key)) {
+            return metadata;
+        }
+    }
+    
+    return null;
+}
+
 // Pre-compute a lookup for the latest run per label
 function createLatestRunsLookup(benchmarkRuns) {
     const latestRunsMap = new Map();
@@ -259,17 +303,31 @@ function generateExtraInfo(latestRunsLookup, data) {
     const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
 
     return labels.map(label => {
+        const metadata = metadataForLabel(label);
         const latestRun = latestRunsLookup.get(label);
-
-        if (latestRun) {
-            return `<div class="extra-info-entry">
-                        <strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>
-                        <em>Description:</em> ${latestRun.result.description}
-                    </div>`;
+        
+        let html = '<div class="extra-info-entry">';
+        
+        if (metadata) {
+            html += `<strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>`;
+            
+            if (metadata.description) {
+                html += `<em>Description:</em> ${metadata.description}`;
+            }
+            
+            if (metadata.notes) {
+                html += `<br><em>Notes:</em> <span class="note-text">${metadata.notes}</span>`;
+            }
+            
+            if (metadata.unstable) {
+                html += `<br><em class="unstable-warning">⚠️ Unstable:</em> <span class="unstable-text">${metadata.unstable}</span>`;
+            }
+        } else {
+            html += `<strong>${label}:</strong> No data available`;
         }
-        return `<div class="extra-info-entry">
-                        <strong>${label}:</strong> No data available
-                </div>`;
+        
+        html += '</div>';
+        return html;
     }).join('');
 }
 
@@ -331,6 +389,10 @@ function updateURL() {
         url.searchParams.delete('runs');
     }
 
+    // Add toggle states to URL
+    url.searchParams.set('notes', showNotes);
+    url.searchParams.set('unstable', showUnstable);
+
     history.replaceState(null, '', url);
 }
 
@@ -342,7 +404,19 @@ function filterCharts() {
     document.querySelectorAll('.chart-container').forEach(container => {
         const label = container.getAttribute('data-label');
         const suite = container.getAttribute('data-suite');
-        container.style.display = (regex.test(label) && activeSuites.includes(suite)) ? '' : 'none';
+        const isUnstable = container.getAttribute('data-unstable') === 'true';
+
+        // Hide unstable benchmarks if showUnstable is false
+        const shouldShow = regex.test(label) && 
+                          activeSuites.includes(suite) && 
+                          (showUnstable || !isUnstable);
+
+        container.style.display = shouldShow ? '' : 'none';
+    });
+
+    // Update notes visibility
+    document.querySelectorAll('.benchmark-note').forEach(note => {
+        note.style.display = showNotes ? 'block' : 'none';
     });
 
     updateURL();
@@ -395,13 +469,20 @@ function processBarChartsData(benchmarkRuns) {
             if (!result.explicit_group) return;
 
             if (!groupedResults[result.explicit_group]) {
+                // Look up group metadata
+                const groupMetadata = metadataForLabel(result.explicit_group);
+                
                 groupedResults[result.explicit_group] = {
                     label: result.explicit_group,
                     suite: result.suite,
                     unit: result.unit,
                     lower_is_better: result.lower_is_better,
                     labels: [],
-                    datasets: []
+                    datasets: [],
+                    // Add metadata if available
+                    description: groupMetadata?.description || null,
+                    notes: groupMetadata?.notes || null,
+                    unstable: groupMetadata?.unstable || null
                 };
             }
 
@@ -466,6 +547,43 @@ function setupSuiteFilters() {
     });
 }
 
+function setupToggles() {
+    const notesToggle = document.getElementById('show-notes');
+    const unstableToggle = document.getElementById('show-unstable');
+    
+    notesToggle.addEventListener('change', function() {
+        showNotes = this.checked;
+        // Update all note elements visibility
+        document.querySelectorAll('.benchmark-note').forEach(note => {
+            note.style.display = showNotes ? 'block' : 'none';
+        });
+        filterCharts();
+    });
+    
+    unstableToggle.addEventListener('change', function() {
+        showUnstable = this.checked;
+        // Update all unstable warning elements visibility
+        document.querySelectorAll('.benchmark-unstable').forEach(warning => {
+            warning.style.display = showUnstable ? 'block' : 'none';
+        });
+        filterCharts();
+    });
+    
+    // Initialize from URL params if present
+    const notesParam = getQueryParam('notes');
+    const unstableParam = getQueryParam('unstable');
+    
+    if (notesParam !== null) {
+        showNotes = notesParam === 'true';
+        notesToggle.checked = showNotes;
+    }
+    
+    if (unstableParam !== null) {
+        showUnstable = unstableParam === 'true';
+        unstableToggle.checked = showUnstable;
+    }
+}
+
 function initializeCharts() {
     // Process raw data
     timeseriesData = processTimeseriesData(benchmarkRuns);
@@ -502,6 +620,7 @@ function initializeCharts() {
     // Setup UI components
     setupRunSelector();
     setupSuiteFilters();
+    setupToggles();
 
     // Apply URL parameters
     const regexParam = getQueryParam('regex');
@@ -542,7 +661,8 @@ function loadData() {
         fetch(remoteDataUrl)
             .then(response => response.json())
             .then(data => {
-                benchmarkRuns = data;
+                benchmarkRuns = data.runs || data;
+                benchmarkMetadata = data.metadata || benchmarkMetadata || {};
                 initializeCharts();
             })
             .catch(error => {
@@ -553,7 +673,7 @@ function loadData() {
                 loadingIndicator.style.display = 'none'; // Hide loading indicator
             });
     } else {
-        // Use local data
+        // Use local data (benchmarkRuns and benchmarkMetadata should be defined in data.js)
         initializeCharts();
         loadingIndicator.style.display = 'none'; // Hide loading indicator
     }
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 1d7304ea5e212..8db0549a861a4 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -137,6 +137,18 @@ def process_results(
     return valid_results, processed
 
 
+def collect_metadata(suites):
+    metadata = {}
+
+    for s in suites:
+        metadata.update(s.additionalMetadata())
+        suite_benchmarks = s.benchmarks()
+        for benchmark in suite_benchmarks:
+            metadata[benchmark.name()] = benchmark.get_metadata()
+
+    return metadata
+
+
 def main(directory, additional_env_vars, save_name, compare_names, filter):
     prepare_workdir(directory, INTERNAL_WORKDIR_VERSION)
 
@@ -160,6 +172,13 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         else []
     )
 
+    # Collect metadata from all benchmarks without setting them up
+    metadata = collect_metadata(suites)
+
+    # If dry run, we're done
+    if options.dry_run:
+        suites = []
+
     benchmarks = []
     failures = {}
 
@@ -290,7 +309,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         html_path = options.output_directory
         if options.output_directory is None:
             html_path = os.path.join(os.path.dirname(__file__), "html")
-        generate_html(history.runs, compare_names, html_path)
+        generate_html(history.runs, compare_names, html_path, metadata)
 
 
 def validate_and_parse_env_args(env_args):
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 49b4d1d84a214..b71f87371b383 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -6,10 +6,17 @@
 import json
 import os
 from options import options
+from utils.result import BenchmarkMetadata
 
 
-def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str):
+def generate_html(
+    benchmark_runs: list,
+    compare_names: list[str],
+    html_path: str,
+    metadata: dict[str, BenchmarkMetadata],
+):
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
+    serializable_metadata = {k: v.__dict__ for k, v in metadata.items()}
 
     if options.output_html == "local":
         data_path = os.path.join(html_path, "data.js")
@@ -26,6 +33,11 @@ def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str
 
             f.write("\n];\n\n")  # terminates benchmarkRuns
 
+            f.write("benchmarkMetadata = ")
+            json.dump(serializable_metadata, f)
+
+            f.write(";\n\n")  # terminates benchmarkMetadata
+
             f.write("defaultCompareNames = ")
             json.dump(compare_names, f)
             f.write(";\n")  # terminates defaultCompareNames
@@ -34,12 +46,8 @@ def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str
     else:
         data_path = os.path.join(html_path, "data.json")
         with open(data_path, "w") as f:
-            f.write("[\n")
-            for i, run in enumerate(benchmark_runs):
-                if i > 0:
-                    f.write(",\n")
-                f.write(run.to_json())
-            f.write("\n]\n")
+            json_data = {"runs": benchmark_runs, "metadata": serializable_metadata}
+            json.dump(json_data, f, indent=2)
 
         print(
             f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 7d82d9e488edf..11d837068b887 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -42,3 +42,12 @@ class BenchmarkRun:
         default=None,
         metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat),
     )
+
+
+@dataclass_json
+@dataclass
+class BenchmarkMetadata:
+    type: str = 'benchmark' # or 'group'
+    description: Optional[str] = None
+    notes: Optional[str] = None
+    unstable: Optional[str] = None

From 30cd308f4faec6e884234fec382c0580cec8f9ca Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:23:37 +0100
Subject: [PATCH 42/79] apply formatting

---
 devops/scripts/benchmarks/benches/base.py    |  2 +-
 devops/scripts/benchmarks/benches/compute.py | 11 +++---
 devops/scripts/benchmarks/benches/test.py    |  8 ++--
 devops/scripts/benchmarks/html/scripts.js    | 40 ++++++++++----------
 devops/scripts/benchmarks/utils/result.py    |  2 +-
 5 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 1135a267864a9..8403097eca168 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -110,7 +110,7 @@ def unstable(self) -> str:
 
     def get_metadata(self) -> BenchmarkMetadata:
         return BenchmarkMetadata(
-            type='benchmark',
+            type="benchmark",
             description=self.description(),
             notes=self.notes(),
             unstable=self.unstable(),
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 67ec0bf2087ff..9386f4d2b1b35 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -56,21 +56,20 @@ def setup(self):
 
     def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
         return {
-            "SubmitKernel" : BenchmarkMetadata(
+            "SubmitKernel": BenchmarkMetadata(
                 type="group",
                 description="Measures CPU time overhead of submitting kernels through different APIs.",
                 notes="Each layer builds on top of the previous layer, adding functionality and overhead. "
-                      "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. "
-                      "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance."
-                      "Work is ongoing to reduce the overhead of the SYCL API",
+                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. "
+                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance."
+                "Work is ongoing to reduce the overhead of the SYCL API",
             ),
-            "SinKernelGraph" : BenchmarkMetadata(
+            "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
             ),
         }
 
-
     def benchmarks(self) -> list[Benchmark]:
         if options.sycl is None:
             return []
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index e7451e24f25cf..3802597f5c48a 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -39,18 +39,20 @@ def benchmarks(self) -> list[Benchmark]:
                 value = base_value * value_multiplier
                 diff = base_diff * value_multiplier
 
-                result.append(TestBench(self, name, value, diff, group, notes, unstable))
+                result.append(
+                    TestBench(self, name, value, diff, group, notes, unstable)
+                )
 
         return result
 
     def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
         return {
-            "Foo Group" : BenchmarkMetadata(
+            "Foo Group": BenchmarkMetadata(
                 type="group",
                 description="This is a test benchmark for Foo Group.",
                 notes="This is a test note for Foo Group.",
             ),
-            "Bar Group" : BenchmarkMetadata(
+            "Bar Group": BenchmarkMetadata(
                 type="group",
                 description="This is a test benchmark for Bar Group.",
                 unstable="This is an unstable note for Bar Group.",
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index ed7e361e14275..4136bb647b079 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -205,12 +205,12 @@ function createChartContainer(data, canvasId, type) {
     container.className = 'chart-container';
     container.setAttribute('data-label', data.label);
     container.setAttribute('data-suite', data.suite);
-    
+
     // Check if this benchmark is marked as unstable
     const metadata = metadataForLabel(data.label, type);
     if (metadata && metadata.unstable) {
         container.setAttribute('data-unstable', 'true');
-        
+
         // Add unstable warning
         const unstableWarning = document.createElement('div');
         unstableWarning.className = 'benchmark-unstable';
@@ -218,7 +218,7 @@ function createChartContainer(data, canvasId, type) {
         unstableWarning.style.display = showUnstable ? 'block' : 'none';
         container.appendChild(unstableWarning);
     }
-    
+
     // Add notes if present
     if (metadata && metadata.notes) {
         const noteElement = document.createElement('div');
@@ -227,7 +227,7 @@ function createChartContainer(data, canvasId, type) {
         noteElement.style.display = showNotes ? 'block' : 'none';
         container.appendChild(noteElement);
     }
-    
+
     // Add description if present in metadata, but only for groups
     if (metadata && metadata.description && metadata.type === "group") {
         const descElement = document.createElement('div');
@@ -274,7 +274,7 @@ function metadataForLabel(label, type) {
             return metadata;
         }
     }
-    
+
     return null;
 }
 
@@ -305,27 +305,27 @@ function generateExtraInfo(latestRunsLookup, data) {
     return labels.map(label => {
         const metadata = metadataForLabel(label);
         const latestRun = latestRunsLookup.get(label);
-        
+
         let html = '<div class="extra-info-entry">';
-        
+
         if (metadata) {
             html += `<strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>`;
-            
+
             if (metadata.description) {
                 html += `<em>Description:</em> ${metadata.description}`;
             }
-            
+
             if (metadata.notes) {
                 html += `<br><em>Notes:</em> <span class="note-text">${metadata.notes}</span>`;
             }
-            
+
             if (metadata.unstable) {
                 html += `<br><em class="unstable-warning">⚠️ Unstable:</em> <span class="unstable-text">${metadata.unstable}</span>`;
             }
         } else {
             html += `<strong>${label}:</strong> No data available`;
         }
-        
+
         html += '</div>';
         return html;
     }).join('');
@@ -407,9 +407,9 @@ function filterCharts() {
         const isUnstable = container.getAttribute('data-unstable') === 'true';
 
         // Hide unstable benchmarks if showUnstable is false
-        const shouldShow = regex.test(label) && 
-                          activeSuites.includes(suite) && 
-                          (showUnstable || !isUnstable);
+        const shouldShow = regex.test(label) &&
+            activeSuites.includes(suite) &&
+            (showUnstable || !isUnstable);
 
         container.style.display = shouldShow ? '' : 'none';
     });
@@ -471,7 +471,7 @@ function processBarChartsData(benchmarkRuns) {
             if (!groupedResults[result.explicit_group]) {
                 // Look up group metadata
                 const groupMetadata = metadataForLabel(result.explicit_group);
-                
+
                 groupedResults[result.explicit_group] = {
                     label: result.explicit_group,
                     suite: result.suite,
@@ -550,7 +550,7 @@ function setupSuiteFilters() {
 function setupToggles() {
     const notesToggle = document.getElementById('show-notes');
     const unstableToggle = document.getElementById('show-unstable');
-    
+
     notesToggle.addEventListener('change', function() {
         showNotes = this.checked;
         // Update all note elements visibility
@@ -559,7 +559,7 @@ function setupToggles() {
         });
         filterCharts();
     });
-    
+
     unstableToggle.addEventListener('change', function() {
         showUnstable = this.checked;
         // Update all unstable warning elements visibility
@@ -568,16 +568,16 @@ function setupToggles() {
         });
         filterCharts();
     });
-    
+
     // Initialize from URL params if present
     const notesParam = getQueryParam('notes');
     const unstableParam = getQueryParam('unstable');
-    
+
     if (notesParam !== null) {
         showNotes = notesParam === 'true';
         notesToggle.checked = showNotes;
     }
-    
+
     if (unstableParam !== null) {
         showUnstable = unstableParam === 'true';
         unstableToggle.checked = showUnstable;
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 11d837068b887..b29d973602a35 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -47,7 +47,7 @@ class BenchmarkRun:
 @dataclass_json
 @dataclass
 class BenchmarkMetadata:
-    type: str = 'benchmark' # or 'group'
+    type: str = "benchmark"  # or 'group'
     description: Optional[str] = None
     notes: Optional[str] = None
     unstable: Optional[str] = None

From 5e0539a50fd9835b99391a6bc91e833604cf40ea Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:33:26 +0100
Subject: [PATCH 43/79] fix multiple descriptions/notes

---
 devops/scripts/benchmarks/benches/compute.py | 8 ++++----
 devops/scripts/benchmarks/benches/test.py    | 3 ++-
 devops/scripts/benchmarks/html/index.html    | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 9386f4d2b1b35..f69df1966d690 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -59,10 +59,10 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             "SubmitKernel": BenchmarkMetadata(
                 type="group",
                 description="Measures CPU time overhead of submitting kernels through different APIs.",
-                notes="Each layer builds on top of the previous layer, adding functionality and overhead. "
-                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. "
-                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance."
-                "Work is ongoing to reduce the overhead of the SYCL API",
+                notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
+                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
+                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
+                "Work is ongoing to reduce the overhead of the SYCL API\n",
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 3802597f5c48a..0e4ee55286fb0 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -50,7 +50,8 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             "Foo Group": BenchmarkMetadata(
                 type="group",
                 description="This is a test benchmark for Foo Group.",
-                notes="This is a test note for Foo Group.",
+                notes="This is a test note for Foo Group.\n"
+                      "Look, multiple lines!",
             ),
             "Bar Group": BenchmarkMetadata(
                 type="group",
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index c40174b7f35a0..446b103029c80 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -192,6 +192,7 @@
             margin-bottom: 10px;
             border-radius: 5px;
             border-left: 4px solid #084298;
+            white-space: pre-line;
         }
         .benchmark-unstable {
             background-color: #f8d7da;
@@ -200,6 +201,7 @@
             margin-bottom: 10px;
             border-radius: 5px;
             border-left: 4px solid #842029;
+            white-space: pre-line;
         }
         .note-text {
             color: #084298;

From 137407a3e41f8764e51a42e88d16c7f6b6abcb79 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:38:20 +0100
Subject: [PATCH 44/79] fix benchmark descriptions

---
 devops/scripts/benchmarks/html/index.html | 10 ++++++++++
 devops/scripts/benchmarks/html/scripts.js |  6 +++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index 446b103029c80..32a00ab67bb47 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -264,6 +264,16 @@
         .display-options input {
             margin-right: 8px;
         }
+        .benchmark-description {
+            background-color: #f2f2f2;
+            color: #333;
+            padding: 10px;
+            margin-bottom: 10px;
+            border-radius: 5px;
+            border-left: 4px solid #6c757d;
+            white-space: pre-line;
+            font-style: italic;
+        }
     </style>
 </head>
 <body>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 4136bb647b079..47d23ff8f6e9d 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -260,7 +260,7 @@ function createChartContainer(data, canvasId, type) {
     const extraInfo = document.createElement('div');
     extraInfo.className = 'extra-info';
     latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
-    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
+    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data, 'benchmark');
     details.appendChild(extraInfo);
 
     container.appendChild(details);
@@ -299,11 +299,11 @@ function createLatestRunsLookup(benchmarkRuns) {
     return latestRunsMap;
 }
 
-function generateExtraInfo(latestRunsLookup, data) {
+function generateExtraInfo(latestRunsLookup, data, type) {
     const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
 
     return labels.map(label => {
-        const metadata = metadataForLabel(label);
+        const metadata = metadataForLabel(label, type);
         const latestRun = latestRunsLookup.get(label);
 
         let html = '<div class="extra-info-entry">';

From e0f5ca61518604940f08ad0eb7f21ed5b42aa945 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:46:57 +0100
Subject: [PATCH 45/79] fix remote html output

---
 devops/scripts/benchmarks/benches/test.py |  3 +-
 devops/scripts/benchmarks/output_html.py  | 36 +++++++++++------------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 0e4ee55286fb0..4862bc64ecbaf 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -50,8 +50,7 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             "Foo Group": BenchmarkMetadata(
                 type="group",
                 description="This is a test benchmark for Foo Group.",
-                notes="This is a test note for Foo Group.\n"
-                      "Look, multiple lines!",
+                notes="This is a test note for Foo Group.\n" "Look, multiple lines!",
             ),
             "Bar Group": BenchmarkMetadata(
                 type="group",
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index b71f87371b383..40a3f914e5115 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -18,36 +18,36 @@ def generate_html(
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
     serializable_metadata = {k: v.__dict__ for k, v in metadata.items()}
 
+    serializable_runs = [json.loads(run.to_json()) for run in benchmark_runs]
+
+    data = {
+        "runs": serializable_runs,
+        "metadata": serializable_metadata,
+        "defaultCompareNames": compare_names,
+    }
+
     if options.output_html == "local":
         data_path = os.path.join(html_path, "data.js")
-        # Write data to js file
-        # We can't store this as a standalone json file because it needs to be inline in the html
         with open(data_path, "w") as f:
-            f.write("benchmarkRuns = [\n")
-            # it might be tempting to just to create a list and convert
-            # that to a json, but that leads to json being serialized twice.
-            for i, run in enumerate(benchmark_runs):
-                if i > 0:
-                    f.write(",\n")
-                f.write(run.to_json())
-
-            f.write("\n];\n\n")  # terminates benchmarkRuns
+            # For local format, we need to write JavaScript variable assignments
+            f.write("benchmarkRuns = ")
+            json.dump(data["runs"], f, indent=2)
+            f.write(";\n\n")
 
             f.write("benchmarkMetadata = ")
-            json.dump(serializable_metadata, f)
-
-            f.write(";\n\n")  # terminates benchmarkMetadata
+            json.dump(data["metadata"], f, indent=2)
+            f.write(";\n\n")
 
             f.write("defaultCompareNames = ")
-            json.dump(compare_names, f)
-            f.write(";\n")  # terminates defaultCompareNames
+            json.dump(data["defaultCompareNames"], f, indent=2)
+            f.write(";\n")
 
         print(f"See {os.getcwd()}/html/index.html for the results.")
     else:
+        # For remote format, we write a single JSON file
         data_path = os.path.join(html_path, "data.json")
         with open(data_path, "w") as f:
-            json_data = {"runs": benchmark_runs, "metadata": serializable_metadata}
-            json.dump(json_data, f, indent=2)
+            json.dump(data, f, indent=2)
 
         print(
             f"Upload {data_path} to a location set in config.js remoteDataUrl argument."

From 1041db695a7da031879bea08f2b2b0b0c9e76151 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:55:39 +0100
Subject: [PATCH 46/79] fix metadata collection with dry run

---
 devops/scripts/benchmarks/main.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 8db0549a861a4..e701b9eac70a2 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -159,18 +159,14 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         options.extra_ld_libraries.extend(cr.ld_libraries())
         options.extra_env_vars.update(cr.env_vars())
 
-    suites = (
-        [
-            ComputeBench(directory),
-            VelocityBench(directory),
-            SyclBench(directory),
-            LlamaCppBench(directory),
-            UMFSuite(directory),
-            TestSuite(),
-        ]
-        if not options.dry_run
-        else []
-    )
+    suites = [
+        ComputeBench(directory),
+        VelocityBench(directory),
+        SyclBench(directory),
+        LlamaCppBench(directory),
+        UMFSuite(directory),
+        TestSuite(),
+    ]
 
     # Collect metadata from all benchmarks without setting them up
     metadata = collect_metadata(suites)

From fae04f46984b39925c1724b0e3c7125490d4ab7b Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 13:30:46 +0100
Subject: [PATCH 47/79] cleanup compute bench, fix readme, use newer sycl-bench

---
 devops/scripts/benchmarks/README.md           |   8 +-
 devops/scripts/benchmarks/benches/base.py     |  13 +-
 devops/scripts/benchmarks/benches/compute.py  | 201 ++++++++----------
 .../scripts/benchmarks/benches/syclbench.py   |   4 +-
 devops/scripts/benchmarks/main.py             |   2 +-
 5 files changed, 99 insertions(+), 129 deletions(-)

diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
index 004fe14eca35b..fcadded3cad51 100644
--- a/devops/scripts/benchmarks/README.md
+++ b/devops/scripts/benchmarks/README.md
@@ -6,6 +6,8 @@ Scripts for running performance tests on SYCL and Unified Runtime.
 
 - [Velocity Bench](https://github.com/oneapi-src/Velocity-Bench)
 - [Compute Benchmarks](https://github.com/intel/compute-benchmarks/)
+- [LlamaCpp Benchmarks](https://github.com/ggerganov/llama.cpp)
+- [SYCL-Bench](https://github.com/unisa-hpc/sycl-bench)
 
 ## Running
 
@@ -27,8 +29,6 @@ You can also include additional benchmark parameters, such as environment variab
 
 Once all the required information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request.
 
-By default, all benchmark runs are compared against `baseline`, which is a well-established set of the latest data.
-
 You must be a member of the `oneapi-src` organization to access these features.
 
 ## Comparing results
@@ -37,8 +37,8 @@ By default, the benchmark results are not stored. To store them, use the option
 
 You can compare benchmark results using `--compare` option. The comparison will be presented in a markdown output file (see below). If you want to calculate the relative performance of the new results against the previously saved data, use `--compare <previously_saved_data>` (i.e. `--compare baseline`). In case of comparing only stored data without generating new results, use `--dry-run --compare <name1> --compare <name2> --relative-perf <name1>`, where `name1` indicates the baseline for the relative performance calculation and `--dry-run` prevents the script for running benchmarks. Listing more than two `--compare` options results in displaying only execution time, without statistical analysis.
 
-Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results
-are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html).
+Baseline_L0, as well as Baseline_L0v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results
+are stored [here](https://oneapi-src.github.io/unified-runtime/performance/).
 
 ## Output formats
 You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`.
diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 8403097eca168..1bc99b11518e3 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -75,12 +75,6 @@ def download(
         self.data_path = self.create_data_path(name, skip_data_dir)
         return download(self.data_path, url, file, untar, unzip, checksum)
 
-    def name(self):
-        raise NotImplementedError()
-
-    def description(self):
-        return "No description provided."
-
     def lower_is_better(self):
         return True
 
@@ -99,8 +93,11 @@ def stddev_threshold(self):
     def get_suite_name(self) -> str:
         return self.suite.name()
 
-    def result_names(self) -> list[str]:
-        return [self.name()]
+    def name(self):
+        raise NotImplementedError()
+
+    def description(self):
+        return "No description provided."
 
     def notes(self) -> str:
         return None
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index f69df1966d690..c26f645635d27 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -13,6 +13,20 @@
 from enum import Enum
 
 
+class RUNTIMES(Enum):
+    SYCL = "sycl"
+    LEVEL_ZERO = "l0"
+    UR = "ur"
+
+
+def runtime_to_name(runtime: RUNTIMES) -> str:
+    return {
+        RUNTIMES.SYCL: "SYCL",
+        RUNTIMES.LEVEL_ZERO: "Level Zero",
+        RUNTIMES.UR: "Unified Runtime",
+    }[runtime]
+
+
 class ComputeBench(Suite):
     def __init__(self, directory):
         self.directory = directory
@@ -70,6 +84,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             ),
         }
 
+    def enabled_runtimes(self, supported_runtimes=None):
+        # all runtimes in the RUNTIMES enum
+        runtimes = supported_runtimes or list(RUNTIMES)
+
+        # Filter out UR if not available
+        if options.ur is None:
+            runtimes = [r for r in runtimes if r != RUNTIMES.UR]
+
+        return runtimes
+
     def benchmarks(self) -> list[Benchmark]:
         if options.sycl is None:
             return []
@@ -77,11 +101,46 @@ def benchmarks(self) -> list[Benchmark]:
         if options.ur_adapter == "cuda":
             return []
 
-        benches = [
-            SubmitKernelL0(self, 0),
-            SubmitKernelL0(self, 1),
-            SubmitKernelSYCL(self, 0),
-            SubmitKernelSYCL(self, 1),
+        benches = []
+
+        # Add SubmitKernel benchmarks using loops
+        for runtime in self.enabled_runtimes():
+            for in_order_queue in [0, 1]:
+                for measure_completion in [0, 1]:
+                    benches.append(
+                        SubmitKernel(self, runtime, in_order_queue, measure_completion)
+                    )
+
+        # Add SinKernelGraph benchmarks
+        for runtime in self.enabled_runtimes():
+            for with_graphs in [0, 1]:
+                for num_kernels in [5, 100]:
+                    benches.append(
+                        GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels)
+                    )
+
+        # Add ULLS benchmarks
+        for runtime in self.enabled_runtimes([RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO]):
+            benches.append(UllsEmptyKernel(self, runtime, 1000, 256))
+            benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1))
+
+        # Add GraphApiSubmitGraph benchmarks
+        for runtime in self.enabled_runtimes([RUNTIMES.SYCL]):
+            for in_order_queue in [0, 1]:
+                for num_kernels in [4, 10, 32]:
+                    for measure_completion_time in [0, 1]:
+                        benches.append(
+                            GraphApiSubmitGraph(
+                                self,
+                                runtime,
+                                in_order_queue,
+                                num_kernels,
+                                measure_completion_time,
+                            )
+                        )
+
+        # Add other benchmarks
+        benches += [
             QueueInOrderMemcpy(self, 0, "Device", "Device", 1024),
             QueueInOrderMemcpy(self, 0, "Host", "Device", 1024),
             QueueMemcpy(self, "Device", "Device", 1024),
@@ -89,45 +148,14 @@ def benchmarks(self) -> list[Benchmark]:
             ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
             ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
             VectorSum(self),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
-            UllsEmptyKernel(self, RUNTIMES.SYCL, 1000, 256),
-            UllsEmptyKernel(self, RUNTIMES.LEVEL_ZERO, 1000, 256),
-            UllsKernelSwitch(self, RUNTIMES.SYCL, 8, 200, 0, 0, 1, 1),
-            UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1),
         ]
 
-        for in_order_queue in [0, 1]:
-            for num_kernels in [4, 32]:
-                for measure_completion_time in [0, 1]:
-                    benches.append(
-                        GraphApiSubmitGraph(
-                            self,
-                            RUNTIMES.SYCL,
-                            in_order_queue,
-                            num_kernels,
-                            measure_completion_time,
-                        )
-                    )
-
+        # Add UR-specific benchmarks
         if options.ur is not None:
             benches += [
-                SubmitKernelUR(self, 0, 0),
-                SubmitKernelUR(self, 1, 0),
-                SubmitKernelUR(self, 1, 1),
                 MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
                 MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
                 MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 100),
             ]
 
         return benches
@@ -228,98 +256,49 @@ def teardown(self):
         return
 
 
-class SubmitKernelSYCL(ComputeBenchmark):
-    def __init__(self, bench, ioq):
+class SubmitKernel(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
         self.ioq = ioq
-        super().__init__(bench, "api_overhead_benchmark_sycl", "SubmitKernel")
+        self.runtime = runtime
+        self.measure_completion = measure_completion
+        super().__init__(
+            bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
+        )
 
     def name(self):
         order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_sycl SubmitKernel {order}"
+        completion_str = " with measure completion" if self.measure_completion else ""
+        return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}"
 
     def explicit_group(self):
-        return "SubmitKernel"
-
-    def bin_args(self) -> list[str]:
-        return [
-            f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
-            "--MeasureCompletion=0",
-            "--iterations=100000",
-            "--Profiling=0",
-            "--NumKernels=10",
-            "--KernelExecTime=1",
-        ]
-
-    def description(self) -> str:
-        order = "in-order" if self.ioq else "out-of-order"
         return (
-            f"Measures CPU time overhead of submitting {order} kernels through SYCL API."
-            "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
-        )
-
-
-class SubmitKernelUR(ComputeBenchmark):
-    def __init__(self, bench, ioq, measureCompletion):
-        self.ioq = ioq
-        self.measureCompletion = measureCompletion
-        super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel")
-
-    def name(self):
-        order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_ur SubmitKernel {order}" + (
-            " with measure completion" if self.measureCompletion else ""
+            "SubmitKernel"
+            if self.measure_completion == 0
+            else "SubmitKernel With Completion"
         )
 
-    def explicit_group(self):
-        return "SubmitKernel"
-
     def description(self) -> str:
         order = "in-order" if self.ioq else "out-of-order"
-        completion = "including" if self.measureCompletion else "excluding"
-        return (
-            f"Measures CPU time overhead of submitting {order} kernels through Unified Runtime API, "
-            f"{completion} kernel completion time. Uses 10 simple kernels with minimal execution time "
-            f"to isolate API overhead."
-        )
+        runtime_name = runtime_to_name(self.runtime)
 
-    def bin_args(self) -> list[str]:
-        return [
-            f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
-            f"--MeasureCompletion={self.measureCompletion}",
-            "--iterations=100000",
-            "--Profiling=0",
-            "--NumKernels=10",
-            "--KernelExecTime=1",
-        ]
-
-
-class SubmitKernelL0(ComputeBenchmark):
-    def __init__(self, bench, ioq):
-        self.ioq = ioq
-        super().__init__(bench, "api_overhead_benchmark_l0", "SubmitKernel")
-
-    def name(self):
-        order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_l0 SubmitKernel {order}"
+        completion_desc = ""
+        if self.runtime == RUNTIMES.UR:
+            completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time"
 
-    def explicit_group(self):
-        return "SubmitKernel"
+        l0_specific = ""
+        if self.runtime == RUNTIMES.LEVEL_ZERO:
+            l0_specific = " Uses immediate command lists"
 
-    def description(self) -> str:
-        order = "in-order" if self.ioq else "out-of-order"
         return (
-            f"Measures CPU time overhead of submitting {order} kernels through Level Zero API. "
-            f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead "
-            f"from execution time."
+            f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
+            f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}"
         )
 
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
             "--DiscardEvents=0",
-            "--MeasureCompletion=0",
+            f"--MeasureCompletion={self.measure_completion}",
             "--iterations=100000",
             "--Profiling=0",
             "--NumKernels=10",
@@ -521,12 +500,6 @@ def bin_args(self) -> list[str]:
         ]
 
 
-class RUNTIMES(Enum):
-    SYCL = "sycl"
-    LEVEL_ZERO = "l0"
-    UR = "ur"
-
-
 class GraphApiSinKernelGraph(ComputeBenchmark):
     def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
         self.withGraphs = withGraphs
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index cc2db0a2fcf7c..14c0104d0a08c 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -31,8 +31,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "sycl-bench-repo",
-            "https://github.com/mateuszpn/sycl-bench.git",
-            "1e6ab2cfd004a72c5336c26945965017e06eab71",
+            "https://github.com/unisa-hpc/sycl-bench.git",
+            "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b",
         )
 
         configure_command = [
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index e701b9eac70a2..d05575a5a06ca 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -333,7 +333,7 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument(
         "--adapter",
         type=str,
-        help="Options to build the Unified Runtime as part of the benchmark",
+        help="Unified Runtime adapter to use.",
         default="level_zero",
     )
     parser.add_argument(

From cfa4a9cbc5166db535b3754fa7023d01b2589594 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 08:12:22 -0700
Subject: [PATCH 48/79] [CI] configure upload results

---
 .github/workflows/benchmark.yml                  | 2 ++
 .github/workflows/sycl-linux-run-tests.yml       | 6 ++++++
 devops/actions/run-tests/benchmark_v2/action.yml | 5 ++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index f044cbb066757..ca0364f94fde5 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -95,6 +95,7 @@ jobs:
       target_devices: ${{ matrix.backend }}
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
+      benchmark_upload_results: ${{ inputs.upload_results }}
       repo_ref: ${{ matrix.ref }}
       devops_ref: ${{ github.ref }}
       sycl_toolchain_artifact: sycl_linux_default
@@ -122,4 +123,5 @@ jobs:
       target_devices: ${{ matrix.backend }}
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
+      benchmark_upload_results: ${{ inputs.upload_results }}
       repo_ref: ${{ github.ref }}
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index f5b243cb7fc05..cc0b5685afec2 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -114,6 +114,11 @@ on:
         default: ''
         required: False
 
+      benchmark_upload_results:
+        type: string
+        required: False
+        default: 'false'
+
   workflow_dispatch:
     inputs:
       runner:
@@ -337,6 +342,7 @@ jobs:
       uses: ./devops/actions/run-tests/benchmark_v2
       with:
         target_devices: ${{ inputs.target_devices }}
+        upload_results: ${{ inputs.benchmark_upload_results }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 375bc20faf857..e75f4b309499d 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -16,6 +16,9 @@ inputs:
   target_devices:
     type: string
     required: True
+  upload_results:
+    type: string
+    required: True
 
 runs:
   using: "composite"
@@ -99,7 +102,7 @@ runs:
       echo "-----"
       ls
   - name: Push compute-benchmarks results
-    if: always()
+    if: inputs.upload_results == 'true' && always()
     shell: bash
     run: |
       # TODO redo configuration

From ca963e6b9aaa91921e41ef2501891a0bec684ac2 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 09:02:05 -0700
Subject: [PATCH 49/79] [CI] Change config to update during workflow run
 instead

---
 .github/workflows/sycl-docs.yml          | 5 +++++
 devops/scripts/benchmarks/html/config.js | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml
index a45c56bdd869c..6b748ec9c7ebb 100644
--- a/.github/workflows/sycl-docs.yml
+++ b/.github/workflows/sycl-docs.yml
@@ -51,6 +51,11 @@ jobs:
         mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/
         cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks
         touch .nojekyll
+        # Update benchmarking dashboard configuration
+        cat << 'EOF' > benchmarks/config.js
+        remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json';
+        defaultCompareNames = ["Baseline_PVC_L0"];
+        EOF 
     # Upload the generated docs as an artifact and deploy to GitHub Pages.
     - name: Upload artifact
       uses: actions/upload-pages-artifact@v3
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
index 0a8551c5de152..3e67ae1dce8e5 100644
--- a/devops/scripts/benchmarks/html/config.js
+++ b/devops/scripts/benchmarks/html/config.js
@@ -1,2 +1,2 @@
-remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json';
+//remoteDataUrl = 'https://example.com/data.json';
 //defaultCompareNames = ['baseline'];

From 45a02e15ccb3cc01f408c41b3aa27c678c9a30c9 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 09:28:09 -0700
Subject: [PATCH 50/79] [CI] Change save name depending on build

---
 .github/workflows/benchmark.yml               |  1 +
 .github/workflows/sycl-linux-run-tests.yml    |  7 +++-
 .../actions/run-tests/benchmark_v2/action.yml | 32 +++++++++----------
 3 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index ca0364f94fde5..ff2fddb2ae88d 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -96,6 +96,7 @@ jobs:
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
       benchmark_upload_results: ${{ inputs.upload_results }}
+      benchmark_build_hash: ${{ inputs.commit_hash }}
       repo_ref: ${{ matrix.ref }}
       devops_ref: ${{ github.ref }}
       sycl_toolchain_artifact: sycl_linux_default
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index cc0b5685afec2..09821955a5b58 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -116,8 +116,12 @@ on:
 
       benchmark_upload_results:
         type: string
-        required: False
         default: 'false'
+        required: False
+      benchmark_build_hash:
+        type: string
+        default: ''
+        required: False
 
   workflow_dispatch:
     inputs:
@@ -343,6 +347,7 @@ jobs:
       with:
         target_devices: ${{ inputs.target_devices }}
         upload_results: ${{ inputs.benchmark_upload_results }}
+        build_hash: ${{ inputs.benchmark_build_hash }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index e75f4b309499d..bab571ec16ff2 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -19,6 +19,10 @@ inputs:
   upload_results:
     type: string
     required: True
+  build_hash:
+    type: string
+    required: False
+    default: ''
 
 runs:
   using: "composite"
@@ -81,6 +85,8 @@ runs:
     run: |
       git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
+    env:
+      BUILD_HASH: ${{ inputs.build_hash }}
     shell: bash
     run: |
       # TODO generate summary + display helpful message here
@@ -91,16 +97,22 @@ runs:
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
       echo "-----"
       mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+
+      # TODO accomodate for different GPUs and backends
+      SAVE_NAME="Baseline_PVC_L0"
+      if [ -n "$BUILD_HASH" ]; then
+          SAVE_NAME="Commit_PVC_$BUILD_HASH"
+      fi
+
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
         "$(realpath ./llvm_test_workdir)" \
         --sycl "$(realpath ./toolchain)" \
-        --save baseline \
+        --save "$SAVE_NAME" \
         --output-html remote \
         --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
         --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
         --preset Minimal
       echo "-----"
-      ls
   - name: Push compute-benchmarks results
     if: inputs.upload_results == 'true' && always()
     shell: bash
@@ -120,18 +132,4 @@ runs:
         git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
         git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
       fi
-#  - name: Find benchmark result artifact here
-#    if: always()
-#    shell: bash
-#    run: |
-#      cat << EOF
-#      #
-#      # Artifact link for benchmark results here:
-#      #
-#      EOF
-#  - name: Archive compute-benchmark results
-#    if: always()
-#    uses: actions/upload-artifact@v4
-#    with:
-#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-#      path: ./artifact
+

From 98f9d388393ec858c92dc72da7d0420362763562 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 09:33:32 -0700
Subject: [PATCH 51/79] bump to 2024-2025

---
 devops/scripts/benchmarks/benches/base.py          | 2 +-
 devops/scripts/benchmarks/benches/llamacpp.py      | 2 +-
 devops/scripts/benchmarks/benches/syclbench.py     | 2 +-
 devops/scripts/benchmarks/benches/test.py          | 2 +-
 devops/scripts/benchmarks/benches/umf.py           | 2 +-
 devops/scripts/benchmarks/benches/velocity.py      | 2 +-
 devops/scripts/benchmarks/history.py               | 2 +-
 devops/scripts/benchmarks/html/index.html          | 2 +-
 devops/scripts/benchmarks/html/scripts.js          | 2 +-
 devops/scripts/benchmarks/main.py                  | 2 +-
 devops/scripts/benchmarks/output_html.py           | 2 +-
 devops/scripts/benchmarks/presets.py               | 2 +-
 devops/scripts/benchmarks/utils/compute_runtime.py | 2 +-
 devops/scripts/benchmarks/utils/oneapi.py          | 2 +-
 devops/scripts/benchmarks/utils/result.py          | 2 +-
 devops/scripts/benchmarks/utils/utils.py           | 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 1bc99b11518e3..3ca6e3a7b7d3b 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index c12f811942849..38633912b001a 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 14c0104d0a08c..b846b0853ce66 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 4862bc64ecbaf..7afdd803b5cc3 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index 1f736e7755f92..e465d5e9e01c9 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 652a831d0222e..916a321b143cf 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index f05e0192d26ee..0b80c54ad7393 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index 32a00ab67bb47..dc79c2a4781b6 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -1,5 +1,5 @@
 <!--
-  Copyright (C) 2024 Intel Corporation
+  Copyright (C) 2024-2025 Intel Corporation
   Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
   See LICENSE.TXT
   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 47d23ff8f6e9d..175ab92862e17 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -1,4 +1,4 @@
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2024-2025 Intel Corporation
 // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 // See LICENSE.TXT
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index d05575a5a06ca..c4445ddee28db 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 40a3f914e5115..e69dfeb153b49 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index e394a8b4b622e..3f191766deb8c 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
index 74d8ff4eb5345..f4864c112f640 100644
--- a/devops/scripts/benchmarks/utils/compute_runtime.py
+++ b/devops/scripts/benchmarks/utils/compute_runtime.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/utils/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py
index e1876b5ed37fb..fc27b9a8b2d3e 100644
--- a/devops/scripts/benchmarks/utils/oneapi.py
+++ b/devops/scripts/benchmarks/utils/oneapi.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index b29d973602a35..2d9b7e914bd8d 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
index 2d5fad6cd8917..54f2ef7fb9c1f 100644
--- a/devops/scripts/benchmarks/utils/utils.py
+++ b/devops/scripts/benchmarks/utils/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

From ef88ea0956b6735fa40fdda7da509f5ba160e434 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 09:41:47 -0700
Subject: [PATCH 52/79] [CI] Enforce commit hash to be string regardless

---
 .github/workflows/benchmark.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index ff2fddb2ae88d..8e860bce6a384 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -8,6 +8,7 @@ on:
       commit_hash:
         type: string
         required: false
+        default: ''
       upload_results:
         type: string # true/false: workflow_dispatch does not support booleans
         required: true

From b7acba222bf409cc259eb6b14d2ac192fc0198d9 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Tue, 18 Mar 2025 11:30:36 +0000
Subject: [PATCH 53/79] cleanup options in js scripts and fix ordering on bar
 charts

---
 devops/scripts/benchmarks/html/scripts.js | 42 +++++++++++------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 175ab92862e17..82783a652dddc 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -12,10 +12,6 @@ let timeseriesData, barChartsData, allRunNames;
 // DOM Elements
 let runSelect, selectedRunsDiv, suiteFiltersContainer;
 
-// Add this at the top of the file with the other variable declarations
-let showNotes = true;
-let showUnstable = false;
-
 // Run selector functions
 function updateSelectedRuns(forceUpdate = true) {
     selectedRunsDiv.innerHTML = '';
@@ -215,7 +211,7 @@ function createChartContainer(data, canvasId, type) {
         const unstableWarning = document.createElement('div');
         unstableWarning.className = 'benchmark-unstable';
         unstableWarning.textContent = metadata.unstable;
-        unstableWarning.style.display = showUnstable ? 'block' : 'none';
+        unstableWarning.style.display = isUnstableEnabled() ? 'block' : 'none';
         container.appendChild(unstableWarning);
     }
 
@@ -224,7 +220,7 @@ function createChartContainer(data, canvasId, type) {
         const noteElement = document.createElement('div');
         noteElement.className = 'benchmark-note';
         noteElement.textContent = metadata.notes;
-        noteElement.style.display = showNotes ? 'block' : 'none';
+        noteElement.style.display = isNotesEnabled() ? 'block' : 'none';
         container.appendChild(noteElement);
     }
 
@@ -390,8 +386,8 @@ function updateURL() {
     }
 
     // Add toggle states to URL
-    url.searchParams.set('notes', showNotes);
-    url.searchParams.set('unstable', showUnstable);
+    url.searchParams.set('notes', isNotesEnabled());
+    url.searchParams.set('unstable', isUnstableEnabled());
 
     history.replaceState(null, '', url);
 }
@@ -409,16 +405,11 @@ function filterCharts() {
         // Hide unstable benchmarks if showUnstable is false
         const shouldShow = regex.test(label) &&
             activeSuites.includes(suite) &&
-            (showUnstable || !isUnstable);
+            (isUnstableEnabled() || !isUnstable);
 
         container.style.display = shouldShow ? '' : 'none';
     });
 
-    // Update notes visibility
-    document.querySelectorAll('.benchmark-note').forEach(note => {
-        note.style.display = showNotes ? 'block' : 'none';
-    });
-
     updateURL();
 }
 
@@ -464,7 +455,7 @@ function processTimeseriesData(benchmarkRuns) {
 function processBarChartsData(benchmarkRuns) {
     const groupedResults = {};
 
-    benchmarkRuns.forEach(run => {
+    benchmarkRuns.reverse().forEach(run => {
         run.results.forEach(result => {
             if (!result.explicit_group) return;
 
@@ -547,24 +538,31 @@ function setupSuiteFilters() {
     });
 }
 
+function isNotesEnabled() {
+    const notesToggle = document.getElementById('show-notes');
+    return notesToggle.checked;
+}
+
+function isUnstableEnabled() {
+    const unstableToggle = document.getElementById('show-unstable');
+    return unstableToggle.checked;
+}
+
 function setupToggles() {
     const notesToggle = document.getElementById('show-notes');
     const unstableToggle = document.getElementById('show-unstable');
 
     notesToggle.addEventListener('change', function() {
-        showNotes = this.checked;
         // Update all note elements visibility
         document.querySelectorAll('.benchmark-note').forEach(note => {
-            note.style.display = showNotes ? 'block' : 'none';
+            note.style.display = isNotesEnabled() ? 'block' : 'none';
         });
-        filterCharts();
     });
 
     unstableToggle.addEventListener('change', function() {
-        showUnstable = this.checked;
         // Update all unstable warning elements visibility
         document.querySelectorAll('.benchmark-unstable').forEach(warning => {
-            warning.style.display = showUnstable ? 'block' : 'none';
+            warning.style.display = isUnstableEnabled() ? 'block' : 'none';
         });
         filterCharts();
     });
@@ -574,12 +572,12 @@ function setupToggles() {
     const unstableParam = getQueryParam('unstable');
 
     if (notesParam !== null) {
-        showNotes = notesParam === 'true';
+        let showNotes = notesParam === 'true';
         notesToggle.checked = showNotes;
     }
 
     if (unstableParam !== null) {
-        showUnstable = unstableParam === 'true';
+        let showUnstable = unstableParam === 'true';
         unstableToggle.checked = showUnstable;
     }
 }

From e330a500f38f2c460c21eb72cfff8c2a28349af3 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Tue, 18 Mar 2025 11:45:53 +0000
Subject: [PATCH 54/79] use day on x axis for timeseries

---
 devops/scripts/benchmarks/html/scripts.js | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 82783a652dddc..a3ef986efdf14 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -105,7 +105,10 @@ function createChart(data, containerId, type) {
             }
         };
         options.scales.x = {
-            type: 'time',
+            type: 'timeseries',
+            time: {
+                unit: 'day'
+            },
             ticks: {
                 maxRotation: 45,
                 minRotation: 45,

From cae7049c78c697b3ac94f931716d9efb53addcd8 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 19 Mar 2025 16:28:50 -0700
Subject: [PATCH 55/79] [benchmarks] Undo merging in prior tests

---
 devops/scripts/benchmarks/benches/compute.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index f375e6352e389..c26f645635d27 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -148,18 +148,6 @@ def benchmarks(self) -> list[Benchmark]:
             ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
             ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
             VectorSum(self),
-            MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
-            MemcpyExecute(self, 400, 8, 1024, 100, 1, 1, 1),
-            MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
-            MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
         ]
 
         # Add UR-specific benchmarks

From 6bff3d695db298083e070656bd3b622060de98e5 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 10:13:26 +0000
Subject: [PATCH 56/79] add an option to limit build parallelism

---
 devops/scripts/benchmarks/benches/compute.py       |  2 +-
 devops/scripts/benchmarks/benches/llamacpp.py      |  2 +-
 devops/scripts/benchmarks/benches/syclbench.py     |  2 +-
 devops/scripts/benchmarks/benches/velocity.py      |  2 +-
 devops/scripts/benchmarks/main.py                  |  7 +++++++
 devops/scripts/benchmarks/options.py               |  3 ++-
 devops/scripts/benchmarks/utils/compute_runtime.py | 10 +++++-----
 7 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index c26f645635d27..e0a4a6f0cb741 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -64,7 +64,7 @@ def setup(self):
 
         run(configure_command, add_sycl=True)
 
-        run(f"cmake --build {build_path} -j", add_sycl=True)
+        run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True)
 
         self.built = True
 
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 38633912b001a..e2f0ee40cb417 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -67,7 +67,7 @@ def setup(self):
         run(configure_command, add_sycl=True)
 
         run(
-            f"cmake --build {self.build_path} -j",
+            f"cmake --build {self.build_path} -j {options.build_jobs}",
             add_sycl=True,
             ld_library=self.oneapi.ld_libraries(),
         )
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index b846b0853ce66..44f3ca16e8a35 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -51,7 +51,7 @@ def setup(self):
             ]
 
         run(configure_command, add_sycl=True)
-        run(f"cmake --build {build_path} -j", add_sycl=True)
+        run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True)
 
         self.built = True
 
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 916a321b143cf..2622177f7977e 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -101,7 +101,7 @@ def setup(self):
 
         run(configure_command, {"CC": "clang", "CXX": "clang++"}, add_sycl=True)
         run(
-            f"cmake --build {build_path} -j",
+            f"cmake --build {build_path} -j {options.build_jobs}",
             add_sycl=True,
             ld_library=self.ld_libraries(),
         )
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index c4445ddee28db..14e5fe1a04624 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -481,6 +481,12 @@ def validate_and_parse_env_args(env_args):
         help="Specify a custom results directory",
         default=options.custom_results_dir,
     )
+    parser.add_argument(
+        "--build-jobs",
+        type=int,
+        help="Number of build jobs to run simultaneously",
+        default=options.build_jobs,
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -508,6 +514,7 @@ def validate_and_parse_env_args(env_args):
     options.cublas_directory = args.cublas_directory
     options.preset = args.preset
     options.custom_results_dir = args.results_dir
+    options.build_jobs = args.build_jobs
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index ced76a5d692f2..267c7f8142c2f 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
+import multiprocessing
 
 from presets import presets
 
@@ -44,6 +45,6 @@ class Options:
     current_run_name: str = "This PR"
     preset: str = "Full"
     custom_results_dir = None
-
+    build_jobs: int = multiprocessing.cpu_count()
 
 options = Options()
diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
index f4864c112f640..85271726e715c 100644
--- a/devops/scripts/benchmarks/utils/compute_runtime.py
+++ b/devops/scripts/benchmarks/utils/compute_runtime.py
@@ -62,7 +62,7 @@ def build_gmmlib(self, repo, commit):
             f"-DCMAKE_BUILD_TYPE=Release",
         ]
         run(configure_command)
-        run(f"cmake --build {self.gmmlib_build} -j")
+        run(f"cmake --build {self.gmmlib_build} -j {options.build_jobs}")
         run(f"cmake --install {self.gmmlib_build}")
         return self.gmmlib_install
 
@@ -87,7 +87,7 @@ def build_level_zero(self, repo, commit):
             f"-DCMAKE_BUILD_TYPE=Release",
         ]
         run(configure_command)
-        run(f"cmake --build {self.level_zero_build} -j")
+        run(f"cmake --build {self.level_zero_build} -j {options.build_jobs}")
         run(f"cmake --install {self.level_zero_build}")
         return self.level_zero_install
 
@@ -142,8 +142,8 @@ def build_igc(self, repo, commit):
         ]
         run(configure_command)
 
-        # set timeout to 30min. IGC takes A LONG time to build if building from scratch.
-        run(f"cmake --build {self.igc_build} -j", timeout=600 * 3)
+        # set timeout to 2h. IGC takes A LONG time to build if building from scratch.
+        run(f"cmake --build {self.igc_build} -j {options.build_jobs}", timeout=60 * 60 * 2)
         # cmake --install doesn't work...
         run("make install", cwd=self.igc_build)
         return self.igc_install
@@ -214,7 +214,7 @@ def build_compute_runtime(self):
             configure_command.append(f"-DIGC_DIR={self.igc}")
 
         run(configure_command)
-        run(f"cmake --build {self.compute_runtime_build} -j")
+        run(f"cmake --build {self.compute_runtime_build} -j {options.build_jobs}")
         return self.compute_runtime_build
 
 

From d2610c3cae88b7a8e87b4b461d40de828d56e59d Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 12:06:39 +0000
Subject: [PATCH 57/79] add support for benchmark tags

---
 devops/scripts/benchmarks/benches/base.py     | 32 ++++++++++++--
 devops/scripts/benchmarks/benches/compute.py  | 44 ++++++++++++++++++-
 devops/scripts/benchmarks/benches/llamacpp.py |  3 ++
 .../scripts/benchmarks/benches/syclbench.py   |  3 ++
 devops/scripts/benchmarks/benches/umf.py      |  3 ++
 devops/scripts/benchmarks/benches/velocity.py |  3 ++
 devops/scripts/benchmarks/utils/result.py     |  9 +++-
 7 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 3ca6e3a7b7d3b..6c8d0aa4b77dc 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -3,15 +3,36 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+from dataclasses import dataclass
 import os
 import shutil
 from pathlib import Path
-from utils.result import BenchmarkMetadata, Result
+from utils.result import BenchmarkMetadata, BenchmarkTag, Result
 from options import options
 from utils.utils import download, run
-import urllib.request
-import tarfile
 
+benchmark_tags = [BenchmarkTag('sycl', 'Benchmark uses SYCL RT'),
+                  BenchmarkTag('ur', 'Benchmark uses Unified Runtime'),
+                  BenchmarkTag('L0', 'Benchmark uses L0 directly'),
+                  BenchmarkTag('umf', 'Benchmark uses UMF directly'),
+                  BenchmarkTag('micro', 'Microbenchmark focusing on a specific niche'),
+                  BenchmarkTag('application', 'Real application-based performance test'),
+                  BenchmarkTag('proxy', 'Benchmark that tries to implement a real application use-case'),
+                  BenchmarkTag('submit', 'Benchmark tests the kernel submit path'),
+                  BenchmarkTag('math', 'Benchmark tests math compute performance'),
+                  BenchmarkTag('memory', 'Benchmark tests memory transfer performance'),
+                  BenchmarkTag('allocation', 'Benchmark tests memory allocation performance'),
+                  BenchmarkTag('graph', 'Benchmark tests graph performance'),]
+
+def translate_tags(tag_names: list[str]) -> list[BenchmarkTag]:
+    tags = []
+    for tag_name in tag_names:
+        for tag in benchmark_tags:
+            if tag.name == tag_name:
+                tags.append(tag)
+                break
+
+    return tags
 
 class Benchmark:
     def __init__(self, directory, suite):
@@ -105,15 +126,18 @@ def notes(self) -> str:
     def unstable(self) -> str:
         return None
 
+    def get_tags(self) -> list[str]:
+        return []
+
     def get_metadata(self) -> BenchmarkMetadata:
         return BenchmarkMetadata(
             type="benchmark",
             description=self.description(),
             notes=self.notes(),
             unstable=self.unstable(),
+            tags=translate_tags(self.get_tags())
         )
 
-
 class Suite:
     def benchmarks(self) -> list[Benchmark]:
         raise NotImplementedError()
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index e0a4a6f0cb741..2882b29dfa0af 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -7,7 +7,7 @@
 import csv
 import io
 from utils.utils import run, git_clone, create_build_path
-from .base import Benchmark, Suite
+from .base import Benchmark, Suite, translate_tags
 from utils.result import BenchmarkMetadata, Result
 from options import options
 from enum import Enum
@@ -26,6 +26,13 @@ def runtime_to_name(runtime: RUNTIMES) -> str:
         RUNTIMES.UR: "Unified Runtime",
     }[runtime]
 
+def runtime_to_tag_name(runtime: RUNTIMES) -> str:
+    return {
+        RUNTIMES.SYCL: "sycl",
+        RUNTIMES.LEVEL_ZERO: "L0",
+        RUNTIMES.UR: "ur",
+    }[runtime]
+
 
 class ComputeBench(Suite):
     def __init__(self, directory):
@@ -77,10 +84,12 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
+                tags=translate_tags(['submit', 'micro'])
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
+                tags=translate_tags(['submit', 'micro'])
             ),
         }
 
@@ -265,6 +274,9 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
             bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
         )
 
+    def get_tags(self):
+        return ['submit', runtime_to_tag_name(self.runtime), 'micro']
+
     def name(self):
         order = "in order" if self.ioq else "out of order"
         completion_str = " with measure completion" if self.measure_completion else ""
@@ -327,6 +339,9 @@ def description(self) -> str:
             f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads."
         )
 
+    def get_tags(self):
+        return ['memory', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=100000",
@@ -357,6 +372,9 @@ def description(self) -> str:
             f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration."
         )
 
+    def get_tags(self):
+        return ['memory', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -384,6 +402,9 @@ def description(self) -> str:
             f"{self.destination} with {self.size} bytes per operation."
         )
 
+    def get_tags(self):
+        return ['memory', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -413,6 +434,9 @@ def description(self) -> str:
     def lower_is_better(self):
         return False
 
+    def get_tags(self):
+        return ['memory', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -439,6 +463,9 @@ def description(self) -> str:
             "using SYCL."
         )
 
+    def get_tags(self):
+        return ['math', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=1000",
@@ -485,6 +512,9 @@ def description(self) -> str:
             f"from {src_type} to {dst_type} memory {events} events."
         )
 
+    def get_tags(self):
+        return ['memory', 'ur', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--Ioq=1",
@@ -525,6 +555,9 @@ def name(self):
     def unstable(self) -> str:
         return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
 
+    def get_tags(self):
+        return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -557,6 +590,9 @@ def description(self) -> str:
     def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
 
+    def get_tags(self):
+        return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -584,6 +620,9 @@ def description(self) -> str:
     def name(self):
         return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
 
+    def get_tags(self):
+        return [runtime_to_tag_name(self.runtime), 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -622,6 +661,9 @@ def description(self) -> str:
     def name(self):
         return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
 
+    def get_tags(self):
+        return [runtime_to_tag_name(self.runtime), 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=1000",
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index e2f0ee40cb417..f0b5694b52dc8 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -101,6 +101,9 @@ def description(self) -> str:
             "quantized model and leverages SYCL with oneDNN for acceleration."
         )
 
+    def get_tags(self):
+        return ['sycl', 'application']
+
     def lower_is_better(self):
         return False
 
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 44f3ca16e8a35..d9d435baa064e 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -112,6 +112,9 @@ def bin_args(self) -> list[str]:
     def extra_env_vars(self) -> dict:
         return {}
 
+    def get_tags(self):
+        return ['sycl', 'micro']
+
     def setup(self):
         self.benchmark_bin = os.path.join(
             self.directory, "sycl-bench-build", self.bench_name
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index e465d5e9e01c9..ea2ecfd175a85 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -74,6 +74,9 @@ def setup(self):
 
         self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name)
 
+    def get_tags(self):
+        return ['umf', 'allocation']
+
     def run(self, env_vars) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 2622177f7977e..6ff3178202481 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -118,6 +118,9 @@ def parse_output(self, stdout: str) -> float:
     def description(self) -> str:
         return ""
 
+    def get_tags(self):
+        return ['sycl', 'application']
+
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
 
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 2d9b7e914bd8d..0d450ad7b9154 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -27,7 +27,6 @@ class Result:
     name: str = ""
     lower_is_better: bool = True
     suite: str = "Unknown"
-    description: str = "No description provided."
 
 
 @dataclass_json
@@ -44,6 +43,13 @@ class BenchmarkRun:
     )
 
 
+@dataclass_json
+@dataclass
+class BenchmarkTag:
+    name: str
+    description: str = ""
+
+
 @dataclass_json
 @dataclass
 class BenchmarkMetadata:
@@ -51,3 +57,4 @@ class BenchmarkMetadata:
     description: Optional[str] = None
     notes: Optional[str] = None
     unstable: Optional[str] = None
+    tags: list[BenchmarkTag] = field(default_factory=list)

From ffc60bfd004c2577fd19bbe8255c625a13ff5994 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 12:49:00 +0100
Subject: [PATCH 58/79] support for tags in html

---
 devops/scripts/benchmarks/benches/base.py    |  12 +-
 devops/scripts/benchmarks/benches/compute.py |  10 +-
 devops/scripts/benchmarks/html/index.html    | 275 +-------------
 devops/scripts/benchmarks/html/scripts.js    | 143 ++++++-
 devops/scripts/benchmarks/html/styles.css    | 373 +++++++++++++++++++
 devops/scripts/benchmarks/output_html.py     |  32 +-
 devops/scripts/benchmarks/utils/result.py    |  13 +-
 7 files changed, 559 insertions(+), 299 deletions(-)
 create mode 100644 devops/scripts/benchmarks/html/styles.css

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 6c8d0aa4b77dc..16ff5605b08df 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -24,15 +24,7 @@
                   BenchmarkTag('allocation', 'Benchmark tests memory allocation performance'),
                   BenchmarkTag('graph', 'Benchmark tests graph performance'),]
 
-def translate_tags(tag_names: list[str]) -> list[BenchmarkTag]:
-    tags = []
-    for tag_name in tag_names:
-        for tag in benchmark_tags:
-            if tag.name == tag_name:
-                tags.append(tag)
-                break
-
-    return tags
+benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags}
 
 class Benchmark:
     def __init__(self, directory, suite):
@@ -135,7 +127,7 @@ def get_metadata(self) -> BenchmarkMetadata:
             description=self.description(),
             notes=self.notes(),
             unstable=self.unstable(),
-            tags=translate_tags(self.get_tags())
+            tags=self.get_tags()
         )
 
 class Suite:
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 2882b29dfa0af..00db6bdd224d1 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -7,7 +7,7 @@
 import csv
 import io
 from utils.utils import run, git_clone, create_build_path
-from .base import Benchmark, Suite, translate_tags
+from .base import Benchmark, Suite
 from utils.result import BenchmarkMetadata, Result
 from options import options
 from enum import Enum
@@ -84,12 +84,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=translate_tags(['submit', 'micro'])
+                tags=['submit', 'micro', 'sycl', 'ur', 'l0']
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
-                tags=translate_tags(['submit', 'micro'])
+                tags=['submit', 'micro', 'sycl', 'ur', 'L0']
+            ),
+            "SubmitGraph": BenchmarkMetadata(
+                type="group",
+                tags=['submit', 'micro', 'sycl', 'ur', 'L0', 'graph']
             ),
         }
 
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index dc79c2a4781b6..41fe6996ed432 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -15,266 +15,7 @@
     <script src="data.js"></script>
     <script src="config.js"></script>
     <script src="scripts.js"></script>
-    <style>
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-            margin: 0;
-            padding: 16px;
-            background: #f8f9fa;
-        }
-        .container {
-            max-width: 1100px;
-            margin: 0 auto;
-        }
-        h1, h2 {
-            color: #212529;
-            text-align: center;
-            margin-bottom: 24px;
-            font-weight: 500;
-        }
-        .chart-container {
-            background: white;
-            border-radius: 8px;
-            padding: 24px;
-            margin-bottom: 24px;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-        }
-        @media (max-width: 768px) {
-            body {
-                padding: 12px;
-            }
-            .chart-container {
-                padding: 16px;
-                border-radius: 6px;
-            }
-            h1 {
-                font-size: 24px;
-                margin-bottom: 16px;
-            }
-        }
-        .filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-        }
-        .filter-container input {
-            padding: 8px;
-            font-size: 16px;
-            border: 1px solid #ccc;
-            border-radius: 4px;
-            width: 400px;
-            max-width: 100%;
-        }
-        .suite-filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .suite-checkbox {
-            margin: 0 8px;
-        }
-        details {
-            margin-bottom: 24px;
-        }
-        summary {
-            display: flex;
-            justify-content: space-between;
-            align-items: center;
-            font-size: 16px;
-            font-weight: 500;
-            cursor: pointer;
-            padding: 8px;
-            background: #e9ecef;
-            border-radius: 8px;
-            user-select: none;
-        }
-        summary:hover {
-            background: #dee2e6;
-        }
-        .extra-info {
-            padding: 8px;
-            background: #f8f9fa;
-            border-radius: 8px;
-            margin-top: 8px;
-        }
-        .run-selector {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .run-selector select {
-            width: 300px;
-            padding: 8px;
-            margin-right: 8px;
-        }
-        .run-selector button {
-            padding: 8px 16px;
-            background: #0068B5;
-            color: white;
-            border: none;
-            border-radius: 4px;
-            cursor: pointer;
-        }
-        .run-selector button:hover {
-            background: #00C7FD;
-        }
-        .selected-runs {
-            margin-top: 12px;
-        }
-        .selected-run {
-            display: inline-block;
-            padding: 4px 8px;
-            margin: 4px;
-            background: #e2e6ea;
-            border-radius: 4px;
-        }
-        .selected-run button {
-            margin-left: 8px;
-            padding: 0 4px;
-            background: none;
-            border: none;
-            color: #dc3545;
-            cursor: pointer;
-        }
-        .download-button {
-            background: none;
-            border: none;
-            color: #0068B5;
-            cursor: pointer;
-            font-size: 16px;
-            padding: 4px;
-            margin-left: 8px;
-        }
-        .download-button:hover {
-            color: #00C7FD;
-        }
-        .loading-indicator {
-            text-align: center;
-            font-size: 18px;
-            color: #0068B5;
-            margin-bottom: 20px;
-        }
-        .extra-info-entry {
-            border: 1px solid #ddd;
-            padding: 10px;
-            margin-bottom: 10px;
-            background-color: #f9f9f9;
-            border-radius: 5px;
-        }
-        .extra-info-entry strong {
-            display: block;
-            margin-bottom: 5px;
-        }
-        .extra-info-entry em {
-            color: #555;
-        }
-        .display-options-container {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .display-options-container label {
-            margin: 0 12px;
-            cursor: pointer;
-        }
-        .display-options-container input {
-            margin-right: 8px;
-        }
-        .benchmark-note {
-            background-color: #cfe2ff;
-            color: #084298;
-            padding: 10px;
-            margin-bottom: 10px;
-            border-radius: 5px;
-            border-left: 4px solid #084298;
-            white-space: pre-line;
-        }
-        .benchmark-unstable {
-            background-color: #f8d7da;
-            color: #842029;
-            padding: 10px;
-            margin-bottom: 10px;
-            border-radius: 5px;
-            border-left: 4px solid #842029;
-            white-space: pre-line;
-        }
-        .note-text {
-            color: #084298;
-        }
-        .unstable-warning {
-            color: #842029;
-            font-weight: bold;
-        }
-        .unstable-text {
-            color: #842029;
-        }
-        .options-container {
-            margin-bottom: 24px;
-            background: #e9ecef;
-            border-radius: 8px;
-            overflow: hidden;
-        }
-        .options-container summary {
-            padding: 12px 16px;
-            font-weight: 500;
-            cursor: pointer;
-            background: #dee2e6;
-            user-select: none;
-        }
-        .options-container summary:hover {
-            background: #ced4da;
-        }
-        .options-content {
-            padding: 16px;
-            display: flex;
-            flex-wrap: wrap;
-            gap: 24px;
-        }
-        .filter-section {
-            flex: 1;
-            min-width: 300px;
-        }
-        .filter-section h3 {
-            margin-top: 0;
-            margin-bottom: 12px;
-            font-size: 18px;
-            font-weight: 500;
-            text-align: left;
-        }
-        #suite-filters {
-            display: flex;
-            flex-wrap: wrap;
-            gap: 8px;
-        }
-        .display-options {
-            display: flex;
-            flex-direction: column;
-            gap: 8px;
-        }
-        .display-options label {
-            display: flex;
-            align-items: center;
-            cursor: pointer;
-        }
-        .display-options input {
-            margin-right: 8px;
-        }
-        .benchmark-description {
-            background-color: #f2f2f2;
-            color: #333;
-            padding: 10px;
-            margin-bottom: 10px;
-            border-radius: 5px;
-            border-left: 4px solid #6c757d;
-            white-space: pre-line;
-            font-style: italic;
-        }
-    </style>
+    <link rel="stylesheet" href="styles.css">
 </head>
 <body>
     <div class="container">
@@ -315,13 +56,23 @@ <h3>Display Options</h3>
                         </label>
                     </div>
                 </div>
+                
+                <div class="filter-section">
+                    <h3>Tags</h3>
+                    <div class="tag-filter-actions">
+                        <button onclick="toggleAllTags(false)">Clear All</button>
+                    </div>
+                    <div id="tag-filters">
+                        <!-- Tag checkboxes will be generated by JavaScript -->
+                    </div>
+                </div>
             </div>
         </details>
-        <details class="timeseries" open>
+        <details class="timeseries">
             <summary>Historical Results</summary>
             <div class="charts"></div>
         </details>
-        <details class="bar-charts" open>
+        <details class="bar-charts">
             <summary>Comparisons</summary>
             <div class="charts"></div>
         </details>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index a3ef986efdf14..547bcc77bcf31 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -8,9 +8,10 @@ let activeRuns = new Set(defaultCompareNames);
 let chartInstances = new Map();
 let suiteNames = new Set();
 let timeseriesData, barChartsData, allRunNames;
+let activeTags = new Set();
 
 // DOM Elements
-let runSelect, selectedRunsDiv, suiteFiltersContainer;
+let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer;
 
 // Run selector functions
 function updateSelectedRuns(forceUpdate = true) {
@@ -218,6 +219,14 @@ function createChartContainer(data, canvasId, type) {
         container.appendChild(unstableWarning);
     }
 
+    // Add description if present in metadata (moved outside of details)
+    if (metadata && metadata.description) {
+        const descElement = document.createElement('div');
+        descElement.className = 'benchmark-description';
+        descElement.textContent = metadata.description;
+        container.appendChild(descElement);
+    }
+
     // Add notes if present
     if (metadata && metadata.notes) {
         const noteElement = document.createElement('div');
@@ -227,12 +236,29 @@ function createChartContainer(data, canvasId, type) {
         container.appendChild(noteElement);
     }
 
-    // Add description if present in metadata, but only for groups
-    if (metadata && metadata.description && metadata.type === "group") {
-        const descElement = document.createElement('div');
-        descElement.className = 'benchmark-description';
-        descElement.textContent = metadata.description;
-        container.appendChild(descElement);
+    // Add tags if present
+    if (metadata && metadata.tags) {
+        container.setAttribute('data-tags', metadata.tags.join(','));
+        
+        // Add tags display
+        const tagsContainer = document.createElement('div');
+        tagsContainer.className = 'benchmark-tags';
+        
+        metadata.tags.forEach(tag => {
+            const tagElement = document.createElement('span');
+            tagElement.className = 'tag';
+            tagElement.textContent = tag;
+            tagElement.setAttribute('data-tag', tag);
+            
+            // Add tooltip with tag description
+            if (benchmarkTags[tag]) {
+                tagElement.setAttribute('title', benchmarkTags[tag].description);
+            }
+            
+            tagsContainer.appendChild(tagElement);
+        });
+        
+        container.appendChild(tagsContainer);
     }
 
     const canvas = document.createElement('canvas');
@@ -358,6 +384,7 @@ function updateURL() {
     const regex = document.getElementById('bench-filter').value;
     const activeSuites = getActiveSuites();
     const activeRunsList = Array.from(activeRuns);
+    const activeTagsList = Array.from(activeTags);
 
     if (regex) {
         url.searchParams.set('regex', regex);
@@ -371,6 +398,13 @@ function updateURL() {
         url.searchParams.delete('suites');
     }
 
+    // Add tags to URL
+    if (activeTagsList.length > 0) {
+        url.searchParams.set('tags', activeTagsList.join(','));
+    } else {
+        url.searchParams.delete('tags');
+    }
+
     // Handle the runs parameter
     if (activeRunsList.length > 0) {
         // Check if the active runs are the same as default runs
@@ -404,11 +438,18 @@ function filterCharts() {
         const label = container.getAttribute('data-label');
         const suite = container.getAttribute('data-suite');
         const isUnstable = container.getAttribute('data-unstable') === 'true';
+        const tags = container.getAttribute('data-tags') ? 
+                    container.getAttribute('data-tags').split(',') : [];
+
+        // Check if benchmark has all active tags (if any are selected)
+        const hasAllActiveTags = activeTags.size === 0 || 
+                               Array.from(activeTags).every(tag => tags.includes(tag));
 
         // Hide unstable benchmarks if showUnstable is false
         const shouldShow = regex.test(label) &&
             activeSuites.includes(suite) &&
-            (isUnstableEnabled() || !isUnstable);
+            (isUnstableEnabled() || !isUnstable) &&
+            hasAllActiveTags;
 
         container.style.display = shouldShow ? '' : 'none';
     });
@@ -585,6 +626,77 @@ function setupToggles() {
     }
 }
 
+function setupTagFilters() {
+    tagFiltersContainer = document.getElementById('tag-filters');
+    
+    // Get all unique tags from benchmark metadata
+    const allTags = new Set();
+    
+    for (const [key, metadata] of Object.entries(benchmarkMetadata)) {
+        if (metadata.tags) {
+            metadata.tags.forEach(tag => allTags.add(tag));
+        }
+    }
+    
+    // Sort tags alphabetically
+    const sortedTags = Array.from(allTags).sort();
+    
+    // Create tag filter elements
+    sortedTags.forEach(tag => {
+        const tagContainer = document.createElement('div');
+        tagContainer.className = 'tag-filter';
+        
+        const checkbox = document.createElement('input');
+        checkbox.type = 'checkbox';
+        checkbox.id = `tag-${tag}`;
+        checkbox.className = 'tag-checkbox';
+        checkbox.dataset.tag = tag;
+        
+        const label = document.createElement('label');
+        label.htmlFor = `tag-${tag}`;
+        label.textContent = tag;
+        
+        // Add info icon with tooltip if tag description exists
+        if (benchmarkTags[tag]) {
+            const infoIcon = document.createElement('span');
+            infoIcon.className = 'tag-info';
+            infoIcon.textContent = 'ⓘ';
+            infoIcon.title = benchmarkTags[tag].description;
+            label.appendChild(infoIcon);
+        }
+        
+        checkbox.addEventListener('change', function() {
+            if (this.checked) {
+                activeTags.add(tag);
+            } else {
+                activeTags.delete(tag);
+            }
+            filterCharts();
+        });
+        
+        tagContainer.appendChild(checkbox);
+        tagContainer.appendChild(label);
+        tagFiltersContainer.appendChild(tagContainer);
+    });
+}
+
+function toggleAllTags(select) {
+    const checkboxes = document.querySelectorAll('.tag-checkbox');
+    
+    checkboxes.forEach(checkbox => {
+        checkbox.checked = select;
+        const tag = checkbox.dataset.tag;
+        
+        if (select) {
+            activeTags.add(tag);
+        } else {
+            activeTags.delete(tag);
+        }
+    });
+    
+    filterCharts();
+}
+
 function initializeCharts() {
     // Process raw data
     timeseriesData = processTimeseriesData(benchmarkRuns);
@@ -621,11 +733,13 @@ function initializeCharts() {
     // Setup UI components
     setupRunSelector();
     setupSuiteFilters();
+    setupTagFilters();
     setupToggles();
 
     // Apply URL parameters
     const regexParam = getQueryParam('regex');
     const suitesParam = getQueryParam('suites');
+    const tagsParam = getQueryParam('tags');
 
     if (regexParam) {
         document.getElementById('bench-filter').value = regexParam;
@@ -638,6 +752,18 @@ function initializeCharts() {
         });
     }
 
+    // Apply tag filters from URL
+    if (tagsParam) {
+        const tags = tagsParam.split(',');
+        tags.forEach(tag => {
+            const checkbox = document.querySelector(`.tag-checkbox[data-tag="${tag}"]`);
+            if (checkbox) {
+                checkbox.checked = true;
+                activeTags.add(tag);
+            }
+        });
+    }
+
     // Setup event listeners
     document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
         checkbox.addEventListener('change', filterCharts);
@@ -651,6 +777,7 @@ function initializeCharts() {
 // Make functions available globally for onclick handlers
 window.addSelectedRun = addSelectedRun;
 window.removeRun = removeRun;
+window.toggleAllTags = toggleAllTags;
 
 // Load data based on configuration
 function loadData() {
diff --git a/devops/scripts/benchmarks/html/styles.css b/devops/scripts/benchmarks/html/styles.css
new file mode 100644
index 0000000000000..9a3c5fe69b287
--- /dev/null
+++ b/devops/scripts/benchmarks/html/styles.css
@@ -0,0 +1,373 @@
+body {
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+    margin: 0;
+    padding: 16px;
+    background: #f8f9fa;
+}
+.container {
+    max-width: 1100px;
+    margin: 0 auto;
+}
+h1, h2 {
+    color: #212529;
+    text-align: center;
+    margin-bottom: 24px;
+    font-weight: 500;
+}
+.chart-container {
+    background: white;
+    border-radius: 8px;
+    padding: 24px;
+    margin-bottom: 24px;
+    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+}
+@media (max-width: 768px) {
+    body {
+        padding: 12px;
+    }
+    .chart-container {
+        padding: 16px;
+        border-radius: 6px;
+    }
+    h1 {
+        font-size: 24px;
+        margin-bottom: 16px;
+    }
+}
+.filter-container {
+    text-align: center;
+    margin-bottom: 24px;
+}
+.filter-container input {
+    padding: 8px;
+    font-size: 16px;
+    border: 1px solid #ccc;
+    border-radius: 4px;
+    width: 400px;
+    max-width: 100%;
+}
+.suite-filter-container {
+    text-align: center;
+    margin-bottom: 24px;
+    padding: 16px;
+    background: #e9ecef;
+    border-radius: 8px;
+}
+.suite-checkbox {
+    margin: 0 8px;
+}
+details {
+    margin-bottom: 24px;
+}
+summary {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    font-size: 16px;
+    font-weight: 500;
+    cursor: pointer;
+    padding: 12px 16px;
+    background: #dee2e6;
+    border-radius: 8px;
+    user-select: none;
+}
+summary:hover {
+    background: #ced4da;
+}
+summary::marker {
+    display: none;
+}
+summary::-webkit-details-marker {
+    display: none;
+}
+summary::after {
+    content: "▼";
+    font-size: 12px;
+    margin-left: 8px;
+    transition: transform 0.3s;
+}
+details[open] summary::after {
+    transform: rotate(180deg);
+}
+.extra-info {
+    padding: 8px;
+    background: #f8f9fa;
+    border-radius: 8px;
+    margin-top: 8px;
+}
+.run-selector {
+    text-align: center;
+    margin-bottom: 24px;
+    padding: 16px;
+    background: #e9ecef;
+    border-radius: 8px;
+}
+.run-selector select {
+    width: 300px;
+    padding: 8px;
+    margin-right: 8px;
+}
+.run-selector button {
+    padding: 8px 16px;
+    background: #0068B5;
+    color: white;
+    border: none;
+    border-radius: 4px;
+    cursor: pointer;
+}
+.run-selector button:hover {
+    background: #00C7FD;
+}
+.selected-runs {
+    margin-top: 12px;
+}
+.selected-run {
+    display: inline-block;
+    padding: 4px 8px;
+    margin: 4px;
+    background: #e2e6ea;
+    border-radius: 4px;
+}
+.selected-run button {
+    margin-left: 8px;
+    padding: 0 4px;
+    background: none;
+    border: none;
+    color: #dc3545;
+    cursor: pointer;
+}
+.download-button {
+    background: none;
+    border: none;
+    color: #0068B5;
+    cursor: pointer;
+    font-size: 16px;
+    padding: 4px;
+    margin-left: 8px;
+}
+.download-button:hover {
+    color: #00C7FD;
+}
+.loading-indicator {
+    text-align: center;
+    font-size: 18px;
+    color: #0068B5;
+    margin-bottom: 20px;
+}
+.extra-info-entry {
+    border: 1px solid #ddd;
+    padding: 10px;
+    margin-bottom: 10px;
+    background-color: #f9f9f9;
+    border-radius: 5px;
+}
+.extra-info-entry strong {
+    display: block;
+    margin-bottom: 5px;
+}
+.extra-info-entry em {
+    color: #555;
+}
+.display-options-container {
+    text-align: center;
+    margin-bottom: 24px;
+    padding: 16px;
+    background: #e9ecef;
+    border-radius: 8px;
+}
+.display-options-container label {
+    margin: 0 12px;
+    cursor: pointer;
+}
+.display-options-container input {
+    margin-right: 8px;
+}
+.benchmark-note {
+    background-color: #cfe2ff;
+    color: #084298;
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 5px;
+    border-left: 4px solid #084298;
+    white-space: pre-line;
+}
+.benchmark-unstable {
+    background-color: #f8d7da;
+    color: #842029;
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 5px;
+    border-left: 4px solid #842029;
+    white-space: pre-line;
+}
+.note-text {
+    color: #084298;
+}
+.unstable-warning {
+    color: #842029;
+    font-weight: bold;
+}
+.unstable-text {
+    color: #842029;
+}
+.options-container {
+    margin-bottom: 24px;
+    background: #e9ecef;
+    border-radius: 8px;
+    overflow: hidden;
+}
+.options-container summary {
+    padding: 12px 16px;
+    font-weight: 500;
+    cursor: pointer;
+    background: #dee2e6;
+    user-select: none;
+}
+.options-container summary:hover {
+    background: #ced4da;
+}
+.options-content {
+    padding: 16px;
+    display: flex;
+    flex-wrap: wrap;
+    gap: 24px;
+}
+.filter-section {
+    flex: 1;
+    min-width: 300px;
+}
+.filter-section h3 {
+    margin-top: 0;
+    margin-bottom: 12px;
+    font-size: 18px;
+    font-weight: 500;
+    text-align: left;
+}
+#suite-filters {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 8px;
+}
+.display-options {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+}
+.display-options label {
+    display: flex;
+    align-items: center;
+    cursor: pointer;
+}
+.display-options input {
+    margin-right: 8px;
+}
+.benchmark-description {
+    background-color: #f2f2f2;
+    color: #333;
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 5px;
+    border-left: 4px solid #6c757d;
+    white-space: pre-line;
+    font-style: italic;
+}
+/* Tag styles */
+.benchmark-tags {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 4px;
+    margin-bottom: 10px;
+}
+
+.tag {
+    display: inline-block;
+    background-color: #e2e6ea;
+    color: #495057;
+    padding: 2px 8px;
+    border-radius: 12px;
+    font-size: 12px;
+    cursor: default;
+}
+
+.tag-filter {
+    display: inline-flex;
+    align-items: center;
+    margin: 4px;
+}
+
+.tag-filter label {
+    margin-left: 4px;
+    cursor: pointer;
+    display: flex;
+    align-items: center;
+}
+
+.tag-info {
+    color: #0068B5;
+    margin-left: 4px;
+    cursor: help;
+    font-size: 12px;
+}
+
+#tag-filters {
+    display: flex;
+    flex-wrap: wrap;
+    max-height: 200px;
+    overflow-y: auto;
+    border: 1px solid #dee2e6;
+    border-radius: 4px;
+    padding: 8px;
+    background-color: #f8f9fa;
+}
+
+.tag-filter-actions {
+    margin-bottom: 8px;
+    display: flex;
+    gap: 8px;
+}
+
+.tag-filter-actions button {
+    padding: 4px 8px;
+    background: #e2e6ea;
+    border: none;
+    border-radius: 4px;
+    cursor: pointer;
+}
+
+.tag-filter-actions button:hover {
+    background: #ced4da;
+}
+
+#active-tags {
+    display: none;
+    flex-wrap: wrap;
+    gap: 8px;
+    margin-top: 12px;
+    padding: 8px;
+    background-color: #f8f9fa;
+    border-radius: 4px;
+}
+
+.active-tag {
+    display: flex;
+    align-items: center;
+    background-color: #0068B5;
+    color: white;
+    padding: 4px 8px;
+    border-radius: 12px;
+    font-size: 14px;
+}
+
+.remove-tag {
+    background: none;
+    border: none;
+    color: white;
+    margin-left: 4px;
+    cursor: pointer;
+    font-size: 16px;
+    padding: 0 4px;
+}
+
+.remove-tag:hover {
+    color: #f8d7da;
+}
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index e69dfeb153b49..429b24eb632c8 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -6,7 +6,8 @@
 import json
 import os
 from options import options
-from utils.result import BenchmarkMetadata
+from utils.result import BenchmarkMetadata, BenchmarkOutput
+from benches.base import benchmark_tags, benchmark_tags_dict
 
 
 def generate_html(
@@ -16,30 +17,33 @@ def generate_html(
     metadata: dict[str, BenchmarkMetadata],
 ):
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
-    serializable_metadata = {k: v.__dict__ for k, v in metadata.items()}
-
-    serializable_runs = [json.loads(run.to_json()) for run in benchmark_runs]
-
-    data = {
-        "runs": serializable_runs,
-        "metadata": serializable_metadata,
-        "defaultCompareNames": compare_names,
-    }
+    
+    # Create the comprehensive output object
+    output = BenchmarkOutput(
+        runs=benchmark_runs,
+        metadata=metadata,
+        tags=benchmark_tags_dict,
+        default_compare_names=compare_names
+    )
 
     if options.output_html == "local":
         data_path = os.path.join(html_path, "data.js")
         with open(data_path, "w") as f:
             # For local format, we need to write JavaScript variable assignments
             f.write("benchmarkRuns = ")
-            json.dump(data["runs"], f, indent=2)
+            json.dump(json.loads(output.to_json())["runs"], f, indent=2)
             f.write(";\n\n")
 
             f.write("benchmarkMetadata = ")
-            json.dump(data["metadata"], f, indent=2)
+            json.dump(json.loads(output.to_json())["metadata"], f, indent=2)
+            f.write(";\n\n")
+            
+            f.write("benchmarkTags = ")
+            json.dump(json.loads(output.to_json())["tags"], f, indent=2)
             f.write(";\n\n")
 
             f.write("defaultCompareNames = ")
-            json.dump(data["defaultCompareNames"], f, indent=2)
+            json.dump(output.default_compare_names, f, indent=2)
             f.write(";\n")
 
         print(f"See {os.getcwd()}/html/index.html for the results.")
@@ -47,7 +51,7 @@ def generate_html(
         # For remote format, we write a single JSON file
         data_path = os.path.join(html_path, "data.json")
         with open(data_path, "w") as f:
-            json.dump(data, f, indent=2)
+            json.dump(json.loads(output.to_json()), f, indent=2)
 
         print(
             f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 0d450ad7b9154..82fc7ca1fddc2 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from dataclasses import dataclass, field
-from typing import Optional
+from typing import Optional, Dict, List, Any
 from dataclasses_json import config, dataclass_json
 from datetime import datetime
 
@@ -57,4 +57,13 @@ class BenchmarkMetadata:
     description: Optional[str] = None
     notes: Optional[str] = None
     unstable: Optional[str] = None
-    tags: list[BenchmarkTag] = field(default_factory=list)
+    tags: list[str] = field(default_factory=list)  # Changed to list of tag names
+
+
+@dataclass_json
+@dataclass
+class BenchmarkOutput:
+    runs: list[BenchmarkRun]
+    metadata: Dict[str, BenchmarkMetadata]
+    tags: Dict[str, BenchmarkTag]
+    default_compare_names: List[str] = field(default_factory=list)

From 3662b430fa20585aebeec6a256433160b7e8764d Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 13:01:40 +0100
Subject: [PATCH 59/79] tiny tweaks for benchmark tags

---
 devops/scripts/benchmarks/benches/base.py    |  2 +-
 devops/scripts/benchmarks/benches/compute.py |  4 ++--
 devops/scripts/benchmarks/html/scripts.js    | 20 +++++++++-----------
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 16ff5605b08df..209dc993ae53c 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -110,7 +110,7 @@ def name(self):
         raise NotImplementedError()
 
     def description(self):
-        return "No description provided."
+        return ""
 
     def notes(self) -> str:
         return None
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 00db6bdd224d1..bff535dd2c335 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -84,12 +84,12 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=['submit', 'micro', 'sycl', 'ur', 'l0']
+                tags=['submit', 'micro', 'sycl', 'ur', 'L0']
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
-                tags=['submit', 'micro', 'sycl', 'ur', 'L0']
+                tags=['submit', 'memory', 'proxy', 'sycl', 'ur', 'L0', 'graph']
             ),
             "SubmitGraph": BenchmarkMetadata(
                 type="group",
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 547bcc77bcf31..fbfb496533194 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -628,21 +628,19 @@ function setupToggles() {
 
 function setupTagFilters() {
     tagFiltersContainer = document.getElementById('tag-filters');
+
+    const allTags = [];
     
-    // Get all unique tags from benchmark metadata
-    const allTags = new Set();
-    
-    for (const [key, metadata] of Object.entries(benchmarkMetadata)) {
-        if (metadata.tags) {
-            metadata.tags.forEach(tag => allTags.add(tag));
+    if (benchmarkTags) {
+        for (const tag in benchmarkTags) {
+            if (!allTags.includes(tag)) {
+                allTags.push(tag);
+            }
         }
     }
-    
-    // Sort tags alphabetically
-    const sortedTags = Array.from(allTags).sort();
-    
+
     // Create tag filter elements
-    sortedTags.forEach(tag => {
+    allTags.forEach(tag => {
         const tagContainer = document.createElement('div');
         tagContainer.className = 'tag-filter';
         

From 75dd2294adb0682dcab400ce66897ee2d404bbc6 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 13:23:26 +0100
Subject: [PATCH 60/79] better and more tags

---
 devops/scripts/benchmarks/benches/base.py     | 31 +++++++++++-------
 devops/scripts/benchmarks/benches/compute.py  | 32 +++++++++----------
 devops/scripts/benchmarks/benches/llamacpp.py |  2 +-
 .../scripts/benchmarks/benches/syclbench.py   | 11 ++++++-
 devops/scripts/benchmarks/benches/umf.py      |  2 +-
 devops/scripts/benchmarks/benches/velocity.py | 29 ++++++++++++++++-
 6 files changed, 75 insertions(+), 32 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 209dc993ae53c..901235f6e1455 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -11,18 +11,25 @@
 from options import options
 from utils.utils import download, run
 
-benchmark_tags = [BenchmarkTag('sycl', 'Benchmark uses SYCL RT'),
-                  BenchmarkTag('ur', 'Benchmark uses Unified Runtime'),
-                  BenchmarkTag('L0', 'Benchmark uses L0 directly'),
-                  BenchmarkTag('umf', 'Benchmark uses UMF directly'),
-                  BenchmarkTag('micro', 'Microbenchmark focusing on a specific niche'),
-                  BenchmarkTag('application', 'Real application-based performance test'),
-                  BenchmarkTag('proxy', 'Benchmark that tries to implement a real application use-case'),
-                  BenchmarkTag('submit', 'Benchmark tests the kernel submit path'),
-                  BenchmarkTag('math', 'Benchmark tests math compute performance'),
-                  BenchmarkTag('memory', 'Benchmark tests memory transfer performance'),
-                  BenchmarkTag('allocation', 'Benchmark tests memory allocation performance'),
-                  BenchmarkTag('graph', 'Benchmark tests graph performance'),]
+benchmark_tags = [
+    BenchmarkTag('SYCL', 'Benchmark uses SYCL runtime'),
+    BenchmarkTag('UR', 'Benchmark uses Unified Runtime API'),
+    BenchmarkTag('L0', 'Benchmark uses Level Zero API directly'),
+    BenchmarkTag('UMF', 'Benchmark uses Unified Memory Framework directly'),
+    BenchmarkTag('micro', 'Microbenchmark focusing on a specific functionality'),
+    BenchmarkTag('application', 'Real application-based performance test'),
+    BenchmarkTag('proxy', 'Benchmark that simulates real application use-cases'),
+    BenchmarkTag('submit', 'Tests kernel submission performance'),
+    BenchmarkTag('math', 'Tests math computation performance'),
+    BenchmarkTag('memory', 'Tests memory transfer or bandwidth performance'),
+    BenchmarkTag('allocation', 'Tests memory allocation performance'),
+    BenchmarkTag('graph', 'Tests graph-based execution performance'),
+    BenchmarkTag('latency', 'Measures operation latency'),
+    BenchmarkTag('throughput', 'Measures operation throughput'),
+    BenchmarkTag('inference', 'Tests ML/AI inference performance'),
+    BenchmarkTag('image', 'Image processing benchmark'),
+    BenchmarkTag('simulation', 'Physics or scientific simulation benchmark'),
+]
 
 benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags}
 
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index bff535dd2c335..e1f24e0178789 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -28,9 +28,9 @@ def runtime_to_name(runtime: RUNTIMES) -> str:
 
 def runtime_to_tag_name(runtime: RUNTIMES) -> str:
     return {
-        RUNTIMES.SYCL: "sycl",
+        RUNTIMES.SYCL: "SYCL",
         RUNTIMES.LEVEL_ZERO: "L0",
-        RUNTIMES.UR: "ur",
+        RUNTIMES.UR: "UR",
     }[runtime]
 
 
@@ -84,16 +84,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=['submit', 'micro', 'sycl', 'ur', 'L0']
+                tags=['submit', 'micro', 'SYCL', 'UR', 'L0']
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
-                tags=['submit', 'memory', 'proxy', 'sycl', 'ur', 'L0', 'graph']
+                    tags=['submit', 'memory', 'proxy', 'SYCL', 'UR', 'L0', 'graph']
             ),
             "SubmitGraph": BenchmarkMetadata(
                 type="group",
-                tags=['submit', 'micro', 'sycl', 'ur', 'L0', 'graph']
+                tags=['submit', 'micro', 'SYCL', 'UR', 'L0', 'graph']
             ),
         }
 
@@ -279,7 +279,7 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
         )
 
     def get_tags(self):
-        return ['submit', runtime_to_tag_name(self.runtime), 'micro']
+        return ['submit', 'latency', runtime_to_tag_name(self.runtime), 'micro']
 
     def name(self):
         order = "in order" if self.ioq else "out of order"
@@ -344,7 +344,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'sycl', 'micro']
+        return ['memory', 'submit', 'latency', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -377,7 +377,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'sycl', 'micro']
+        return ['memory', 'latency', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -407,7 +407,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'sycl', 'micro']
+        return ['memory', 'latency', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -439,7 +439,7 @@ def lower_is_better(self):
         return False
 
     def get_tags(self):
-        return ['memory', 'sycl', 'micro']
+        return ['memory', 'throughput', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -468,7 +468,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['math', 'sycl', 'micro']
+        return ['math', 'throughput', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -517,7 +517,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'ur', 'micro']
+        return ['memory', 'latency', 'UR', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -560,7 +560,7 @@ def unstable(self) -> str:
         return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
 
     def get_tags(self):
-        return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory']
+        return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory', 'latency']
 
     def bin_args(self) -> list[str]:
         return [
@@ -595,7 +595,7 @@ def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
 
     def get_tags(self):
-        return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit']
+        return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit', 'latency']
 
     def bin_args(self) -> list[str]:
         return [
@@ -625,7 +625,7 @@ def name(self):
         return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
 
     def get_tags(self):
-        return [runtime_to_tag_name(self.runtime), 'micro']
+        return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit']
 
     def bin_args(self) -> list[str]:
         return [
@@ -666,7 +666,7 @@ def name(self):
         return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
 
     def get_tags(self):
-        return [runtime_to_tag_name(self.runtime), 'micro']
+        return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit']
 
     def bin_args(self) -> list[str]:
         return [
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index f0b5694b52dc8..cf203bca17f4f 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -102,7 +102,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['sycl', 'application']
+        return ['SYCL', 'application', 'inference', 'throughput']
 
     def lower_is_better(self):
         return False
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index d9d435baa064e..50f35182eaddc 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -113,7 +113,16 @@ def extra_env_vars(self) -> dict:
         return {}
 
     def get_tags(self):
-        return ['sycl', 'micro']
+        base_tags = ['SYCL', 'micro']
+        if "Memory" in self.bench_name or "mem" in self.bench_name.lower():
+            base_tags.append('memory')
+        if "Reduction" in self.bench_name:
+            base_tags.append('math')
+        if "Bandwidth" in self.bench_name:
+            base_tags.append('throughput')
+        if "Latency" in self.bench_name:
+            base_tags.append('latency')
+        return base_tags
 
     def setup(self):
         self.benchmark_bin = os.path.join(
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index ea2ecfd175a85..60964fcf93298 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -75,7 +75,7 @@ def setup(self):
         self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name)
 
     def get_tags(self):
-        return ['umf', 'allocation']
+        return ['UMF', 'allocation', 'latency', 'micro']
 
     def run(self, env_vars) -> list[Result]:
         command = [
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 6ff3178202481..623079067b91d 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -119,7 +119,7 @@ def description(self) -> str:
         return ""
 
     def get_tags(self):
-        return ['sycl', 'application']
+        return ['SYCL', 'application']
 
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
@@ -175,6 +175,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse keys per second from benchmark output."
             )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'throughput']
+
 
 class Bitcracker(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -213,6 +216,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'throughput']
+
 
 class SobelFilter(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -259,6 +265,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'image', 'throughput']
+
 
 class QuickSilver(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -306,6 +315,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'simulation', 'throughput']
+
 
 class Easywave(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -370,6 +382,9 @@ def parse_output(self, stdout: str) -> float:
             os.path.join(options.benchmark_cwd, "easywave.log")
         )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'simulation']
+
 
 class CudaSift(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -398,6 +413,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'image']
+
 
 class DLCifar(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -449,6 +467,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'inference', 'image']
+
 
 class DLMnist(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -534,6 +555,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'inference', 'image']
+
 
 class SVM(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -576,3 +600,6 @@ def parse_output(self, stdout: str) -> float:
             return float(match.group(1))
         else:
             raise ValueError("Failed to parse benchmark output.")
+
+    def get_tags(self):
+        return ['SYCL', 'application', 'inference']

From cec8f05d40a00981e04c97ecb0abb47b4d2fa4de Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 14:31:27 +0100
Subject: [PATCH 61/79] formatting

---
 devops/scripts/benchmarks/benches/base.py     | 38 +++++++--------
 devops/scripts/benchmarks/benches/compute.py  | 43 +++++++++++------
 devops/scripts/benchmarks/benches/llamacpp.py |  2 +-
 .../scripts/benchmarks/benches/syclbench.py   | 10 ++--
 devops/scripts/benchmarks/benches/umf.py      |  2 +-
 devops/scripts/benchmarks/benches/velocity.py | 20 ++++----
 devops/scripts/benchmarks/html/index.html     | 19 ++++----
 devops/scripts/benchmarks/html/styles.css     | 46 ++++++-------------
 devops/scripts/benchmarks/options.py          |  1 +
 devops/scripts/benchmarks/output_html.py      |  6 +--
 .../benchmarks/utils/compute_runtime.py       |  5 +-
 11 files changed, 96 insertions(+), 96 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 901235f6e1455..4c2973d250e3d 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -12,27 +12,28 @@
 from utils.utils import download, run
 
 benchmark_tags = [
-    BenchmarkTag('SYCL', 'Benchmark uses SYCL runtime'),
-    BenchmarkTag('UR', 'Benchmark uses Unified Runtime API'),
-    BenchmarkTag('L0', 'Benchmark uses Level Zero API directly'),
-    BenchmarkTag('UMF', 'Benchmark uses Unified Memory Framework directly'),
-    BenchmarkTag('micro', 'Microbenchmark focusing on a specific functionality'),
-    BenchmarkTag('application', 'Real application-based performance test'),
-    BenchmarkTag('proxy', 'Benchmark that simulates real application use-cases'),
-    BenchmarkTag('submit', 'Tests kernel submission performance'),
-    BenchmarkTag('math', 'Tests math computation performance'),
-    BenchmarkTag('memory', 'Tests memory transfer or bandwidth performance'),
-    BenchmarkTag('allocation', 'Tests memory allocation performance'),
-    BenchmarkTag('graph', 'Tests graph-based execution performance'),
-    BenchmarkTag('latency', 'Measures operation latency'),
-    BenchmarkTag('throughput', 'Measures operation throughput'),
-    BenchmarkTag('inference', 'Tests ML/AI inference performance'),
-    BenchmarkTag('image', 'Image processing benchmark'),
-    BenchmarkTag('simulation', 'Physics or scientific simulation benchmark'),
+    BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
+    BenchmarkTag("UR", "Benchmark uses Unified Runtime API"),
+    BenchmarkTag("L0", "Benchmark uses Level Zero API directly"),
+    BenchmarkTag("UMF", "Benchmark uses Unified Memory Framework directly"),
+    BenchmarkTag("micro", "Microbenchmark focusing on a specific functionality"),
+    BenchmarkTag("application", "Real application-based performance test"),
+    BenchmarkTag("proxy", "Benchmark that simulates real application use-cases"),
+    BenchmarkTag("submit", "Tests kernel submission performance"),
+    BenchmarkTag("math", "Tests math computation performance"),
+    BenchmarkTag("memory", "Tests memory transfer or bandwidth performance"),
+    BenchmarkTag("allocation", "Tests memory allocation performance"),
+    BenchmarkTag("graph", "Tests graph-based execution performance"),
+    BenchmarkTag("latency", "Measures operation latency"),
+    BenchmarkTag("throughput", "Measures operation throughput"),
+    BenchmarkTag("inference", "Tests ML/AI inference performance"),
+    BenchmarkTag("image", "Image processing benchmark"),
+    BenchmarkTag("simulation", "Physics or scientific simulation benchmark"),
 ]
 
 benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags}
 
+
 class Benchmark:
     def __init__(self, directory, suite):
         self.directory = directory
@@ -134,9 +135,10 @@ def get_metadata(self) -> BenchmarkMetadata:
             description=self.description(),
             notes=self.notes(),
             unstable=self.unstable(),
-            tags=self.get_tags()
+            tags=self.get_tags(),
         )
 
+
 class Suite:
     def benchmarks(self) -> list[Benchmark]:
         raise NotImplementedError()
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index e1f24e0178789..cd4ab7cd9b26c 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -26,6 +26,7 @@ def runtime_to_name(runtime: RUNTIMES) -> str:
         RUNTIMES.UR: "Unified Runtime",
     }[runtime]
 
+
 def runtime_to_tag_name(runtime: RUNTIMES) -> str:
     return {
         RUNTIMES.SYCL: "SYCL",
@@ -84,16 +85,15 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=['submit', 'micro', 'SYCL', 'UR', 'L0']
+                tags=["submit", "micro", "SYCL", "UR", "L0"],
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
-                    tags=['submit', 'memory', 'proxy', 'SYCL', 'UR', 'L0', 'graph']
+                tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"],
             ),
             "SubmitGraph": BenchmarkMetadata(
-                type="group",
-                tags=['submit', 'micro', 'SYCL', 'UR', 'L0', 'graph']
+                type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
             ),
         }
 
@@ -279,7 +279,7 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
         )
 
     def get_tags(self):
-        return ['submit', 'latency', runtime_to_tag_name(self.runtime), 'micro']
+        return ["submit", "latency", runtime_to_tag_name(self.runtime), "micro"]
 
     def name(self):
         order = "in order" if self.ioq else "out of order"
@@ -344,7 +344,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'submit', 'latency', 'SYCL', 'micro']
+        return ["memory", "submit", "latency", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -377,7 +377,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'latency', 'SYCL', 'micro']
+        return ["memory", "latency", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -407,7 +407,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'latency', 'SYCL', 'micro']
+        return ["memory", "latency", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -439,7 +439,7 @@ def lower_is_better(self):
         return False
 
     def get_tags(self):
-        return ['memory', 'throughput', 'SYCL', 'micro']
+        return ["memory", "throughput", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -468,7 +468,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['math', 'throughput', 'SYCL', 'micro']
+        return ["math", "throughput", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -517,7 +517,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'latency', 'UR', 'micro']
+        return ["memory", "latency", "UR", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -560,7 +560,14 @@ def unstable(self) -> str:
         return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
 
     def get_tags(self):
-        return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory', 'latency']
+        return [
+            "graph",
+            runtime_to_tag_name(self.runtime),
+            "proxy",
+            "submit",
+            "memory",
+            "latency",
+        ]
 
     def bin_args(self) -> list[str]:
         return [
@@ -595,7 +602,13 @@ def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
 
     def get_tags(self):
-        return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit', 'latency']
+        return [
+            "graph",
+            runtime_to_tag_name(self.runtime),
+            "micro",
+            "submit",
+            "latency",
+        ]
 
     def bin_args(self) -> list[str]:
         return [
@@ -625,7 +638,7 @@ def name(self):
         return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
 
     def get_tags(self):
-        return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit']
+        return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -666,7 +679,7 @@ def name(self):
         return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
 
     def get_tags(self):
-        return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit']
+        return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]
 
     def bin_args(self) -> list[str]:
         return [
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index cf203bca17f4f..19af2498a0a63 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -102,7 +102,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'inference', 'throughput']
+        return ["SYCL", "application", "inference", "throughput"]
 
     def lower_is_better(self):
         return False
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 50f35182eaddc..f1e366aa5bc4b 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -113,15 +113,15 @@ def extra_env_vars(self) -> dict:
         return {}
 
     def get_tags(self):
-        base_tags = ['SYCL', 'micro']
+        base_tags = ["SYCL", "micro"]
         if "Memory" in self.bench_name or "mem" in self.bench_name.lower():
-            base_tags.append('memory')
+            base_tags.append("memory")
         if "Reduction" in self.bench_name:
-            base_tags.append('math')
+            base_tags.append("math")
         if "Bandwidth" in self.bench_name:
-            base_tags.append('throughput')
+            base_tags.append("throughput")
         if "Latency" in self.bench_name:
-            base_tags.append('latency')
+            base_tags.append("latency")
         return base_tags
 
     def setup(self):
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index 60964fcf93298..f0b92777dd2f8 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -75,7 +75,7 @@ def setup(self):
         self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name)
 
     def get_tags(self):
-        return ['UMF', 'allocation', 'latency', 'micro']
+        return ["UMF", "allocation", "latency", "micro"]
 
     def run(self, env_vars) -> list[Result]:
         command = [
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 623079067b91d..0e1f20999c731 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -119,7 +119,7 @@ def description(self) -> str:
         return ""
 
     def get_tags(self):
-        return ['SYCL', 'application']
+        return ["SYCL", "application"]
 
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
@@ -176,7 +176,7 @@ def parse_output(self, stdout: str) -> float:
             )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'throughput']
+        return ["SYCL", "application", "throughput"]
 
 
 class Bitcracker(VelocityBase):
@@ -217,7 +217,7 @@ def parse_output(self, stdout: str) -> float:
             )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'throughput']
+        return ["SYCL", "application", "throughput"]
 
 
 class SobelFilter(VelocityBase):
@@ -266,7 +266,7 @@ def parse_output(self, stdout: str) -> float:
             )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'image', 'throughput']
+        return ["SYCL", "application", "image", "throughput"]
 
 
 class QuickSilver(VelocityBase):
@@ -316,7 +316,7 @@ def parse_output(self, stdout: str) -> float:
             )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'simulation', 'throughput']
+        return ["SYCL", "application", "simulation", "throughput"]
 
 
 class Easywave(VelocityBase):
@@ -383,7 +383,7 @@ def parse_output(self, stdout: str) -> float:
         )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'simulation']
+        return ["SYCL", "application", "simulation"]
 
 
 class CudaSift(VelocityBase):
@@ -414,7 +414,7 @@ def parse_output(self, stdout: str) -> float:
             raise ValueError("Failed to parse benchmark output.")
 
     def get_tags(self):
-        return ['SYCL', 'application', 'image']
+        return ["SYCL", "application", "image"]
 
 
 class DLCifar(VelocityBase):
@@ -468,7 +468,7 @@ def parse_output(self, stdout: str) -> float:
             raise ValueError("Failed to parse benchmark output.")
 
     def get_tags(self):
-        return ['SYCL', 'application', 'inference', 'image']
+        return ["SYCL", "application", "inference", "image"]
 
 
 class DLMnist(VelocityBase):
@@ -556,7 +556,7 @@ def parse_output(self, stdout: str) -> float:
             raise ValueError("Failed to parse benchmark output.")
 
     def get_tags(self):
-        return ['SYCL', 'application', 'inference', 'image']
+        return ["SYCL", "application", "inference", "image"]
 
 
 class SVM(VelocityBase):
@@ -602,4 +602,4 @@ def parse_output(self, stdout: str) -> float:
             raise ValueError("Failed to parse benchmark output.")
 
     def get_tags(self):
-        return ['SYCL', 'application', 'inference']
+        return ["SYCL", "application", "inference"]
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index 41fe6996ed432..ba8e77c6aff9e 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -36,13 +36,6 @@ <h1>Benchmark Results</h1>
         <details class="options-container">
             <summary>Options</summary>
             <div class="options-content">
-                <div class="filter-section">
-                    <h3>Suites</h3>
-                    <div id="suite-filters">
-                        <!-- Suite checkboxes will be generated by JavaScript -->
-                    </div>
-                </div>
-
                 <div class="filter-section">
                     <h3>Display Options</h3>
                     <div class="display-options">
@@ -56,12 +49,16 @@ <h3>Display Options</h3>
                         </label>
                     </div>
                 </div>
-                
+
                 <div class="filter-section">
-                    <h3>Tags</h3>
-                    <div class="tag-filter-actions">
-                        <button onclick="toggleAllTags(false)">Clear All</button>
+                    <h3>Suites</h3>
+                    <div id="suite-filters">
+                        <!-- Suite checkboxes will be generated by JavaScript -->
                     </div>
+                </div>
+
+                <div class="filter-section">
+                    <h3>Tags <button class="tag-action-button" onclick="toggleAllTags(false)">Clear All</button></h3>
                     <div id="tag-filters">
                         <!-- Tag checkboxes will be generated by JavaScript -->
                     </div>
diff --git a/devops/scripts/benchmarks/html/styles.css b/devops/scripts/benchmarks/html/styles.css
index 9a3c5fe69b287..3e9c3bd22fc37 100644
--- a/devops/scripts/benchmarks/html/styles.css
+++ b/devops/scripts/benchmarks/html/styles.css
@@ -242,11 +242,18 @@ details[open] summary::after {
     font-size: 18px;
     font-weight: 500;
     text-align: left;
+    display: flex;
+    align-items: center;
 }
 #suite-filters {
     display: flex;
     flex-wrap: wrap;
-    gap: 8px;
+    max-height: 200px;
+    overflow-y: auto;
+    border: 1px solid #dee2e6;
+    border-radius: 4px;
+    padding: 8px;
+    background-color: #f8f9fa;
 }
 .display-options {
     display: flex;
@@ -286,7 +293,7 @@ details[open] summary::after {
     padding: 2px 8px;
     border-radius: 12px;
     font-size: 12px;
-    cursor: default;
+    cursor: help;
 }
 
 .tag-filter {
@@ -320,44 +327,21 @@ details[open] summary::after {
     background-color: #f8f9fa;
 }
 
-.tag-filter-actions {
-    margin-bottom: 8px;
-    display: flex;
-    gap: 8px;
-}
-
-.tag-filter-actions button {
-    padding: 4px 8px;
+.tag-action-button {
+    padding: 2px 8px;
     background: #e2e6ea;
     border: none;
     border-radius: 4px;
     cursor: pointer;
+    font-size: 12px;
+    margin-left: 8px;
+    vertical-align: middle;
 }
 
-.tag-filter-actions button:hover {
+.tag-action-button:hover {
     background: #ced4da;
 }
 
-#active-tags {
-    display: none;
-    flex-wrap: wrap;
-    gap: 8px;
-    margin-top: 12px;
-    padding: 8px;
-    background-color: #f8f9fa;
-    border-radius: 4px;
-}
-
-.active-tag {
-    display: flex;
-    align-items: center;
-    background-color: #0068B5;
-    color: white;
-    padding: 4px 8px;
-    border-radius: 12px;
-    font-size: 14px;
-}
-
 .remove-tag {
     background: none;
     border: none;
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 267c7f8142c2f..c852e50c71372 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -47,4 +47,5 @@ class Options:
     custom_results_dir = None
     build_jobs: int = multiprocessing.cpu_count()
 
+
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 429b24eb632c8..319e796a3831d 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -17,13 +17,13 @@ def generate_html(
     metadata: dict[str, BenchmarkMetadata],
 ):
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
-    
+
     # Create the comprehensive output object
     output = BenchmarkOutput(
         runs=benchmark_runs,
         metadata=metadata,
         tags=benchmark_tags_dict,
-        default_compare_names=compare_names
+        default_compare_names=compare_names,
     )
 
     if options.output_html == "local":
@@ -37,7 +37,7 @@ def generate_html(
             f.write("benchmarkMetadata = ")
             json.dump(json.loads(output.to_json())["metadata"], f, indent=2)
             f.write(";\n\n")
-            
+
             f.write("benchmarkTags = ")
             json.dump(json.loads(output.to_json())["tags"], f, indent=2)
             f.write(";\n\n")
diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
index 85271726e715c..e617168f37a76 100644
--- a/devops/scripts/benchmarks/utils/compute_runtime.py
+++ b/devops/scripts/benchmarks/utils/compute_runtime.py
@@ -143,7 +143,10 @@ def build_igc(self, repo, commit):
         run(configure_command)
 
         # set timeout to 2h. IGC takes A LONG time to build if building from scratch.
-        run(f"cmake --build {self.igc_build} -j {options.build_jobs}", timeout=60 * 60 * 2)
+        run(
+            f"cmake --build {self.igc_build} -j {options.build_jobs}",
+            timeout=60 * 60 * 2,
+        )
         # cmake --install doesn't work...
         run("make install", cwd=self.igc_build)
         return self.igc_install

From a0d8370e5011ecb62dc31b7c82542d3e979429d8 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 15:00:10 +0100
Subject: [PATCH 62/79] fix fetching tags from remote json

---
 devops/scripts/benchmarks/html/scripts.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index fbfb496533194..e09b420e95f21 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -789,6 +789,7 @@ function loadData() {
             .then(data => {
                 benchmarkRuns = data.runs || data;
                 benchmarkMetadata = data.metadata || benchmarkMetadata || {};
+                benchmarkTags = data.tags || benchmarkTags || {};
                 initializeCharts();
             })
             .catch(error => {

From c7f8d1084c95af7a8fa2406a666d7b29a9ad6553 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 16:06:00 +0100
Subject: [PATCH 63/79] fix results /w descriptions and add url/commit of
 benchmarks

---
 devops/scripts/benchmarks/benches/compute.py   | 13 ++++++++++---
 devops/scripts/benchmarks/benches/llamacpp.py  | 13 ++++++++++---
 devops/scripts/benchmarks/benches/syclbench.py | 12 ++++++++++--
 devops/scripts/benchmarks/benches/test.py      |  1 -
 devops/scripts/benchmarks/benches/velocity.py  | 13 ++++++++++---
 devops/scripts/benchmarks/utils/result.py      |  3 ++-
 6 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index cd4ab7cd9b26c..0646aa500450a 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -42,6 +42,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "Compute Benchmarks"
 
+    def git_url(self) -> str:
+        return "https://github.com/intel/compute-benchmarks.git"
+
+    def git_hash(self) -> str:
+        return "b5cc46acf61766ab00da04e85bd4da4f7591eb21"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -49,8 +55,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "compute-benchmarks-repo",
-            "https://github.com/intel/compute-benchmarks.git",
-            "b5cc46acf61766ab00da04e85bd4da4f7591eb21",
+            self.git_url(),
+            self.git_hash(),
         )
         build_path = create_build_path(self.directory, "compute-benchmarks-build")
 
@@ -237,7 +243,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
-                    description=self.description(),
+                    git_url=self.git_url(),
+                    git_hash=self.git_hash(),
                 )
             )
         return ret
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 19af2498a0a63..33ffd1f11eabd 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -25,6 +25,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "llama.cpp bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/ggerganov/llama.cpp"
+
+    def git_hash(self) -> str:
+        return "1ee9eea094fe5846c7d8d770aa7caa749d246b23"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -32,8 +38,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "llamacpp-repo",
-            "https://github.com/ggerganov/llama.cpp",
-            "1ee9eea094fe5846c7d8d770aa7caa749d246b23",
+            self.git_url(),
+            self.git_hash(),
         )
 
         self.models_dir = os.path.join(self.directory, "models")
@@ -142,7 +148,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
-                    description=self.description(),
+                    git_url=self.git_url(),
+                    git_hash=self.git_hash(),
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index f1e366aa5bc4b..0d924f7427ef0 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -23,6 +23,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "SYCL-Bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/unisa-hpc/sycl-bench.git"
+
+    def git_hash(self) -> str:
+        return "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -31,8 +37,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "sycl-bench-repo",
-            "https://github.com/unisa-hpc/sycl-bench.git",
-            "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b",
+            self.git_url(),
+            self.git_hash(),
         )
 
         configure_command = [
@@ -159,6 +165,8 @@ def run(self, env_vars) -> list[Result]:
                             env=env_vars,
                             stdout=row,
                             unit="ms",
+                            git_url=self.git_url(),
+                            git_hash=self.git_hash(),
                         )
                     )
 
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 7afdd803b5cc3..ad1e8c9e57735 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -99,7 +99,6 @@ def run(self, env_vars) -> list[Result]:
                 env={"A": "B"},
                 stdout="no output",
                 unit="ms",
-                description=self.description(),
             )
         ]
 
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 0e1f20999c731..4db6a87a97325 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -26,6 +26,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "Velocity Bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/oneapi-src/Velocity-Bench/"
+
+    def git_hash(self) -> str:
+        return "b22215c16f789100449c34bf4eaa3fb178983d69"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -33,8 +39,8 @@ def setup(self):
         self.repo_path = git_clone(
             self.directory,
             "velocity-bench-repo",
-            "https://github.com/oneapi-src/Velocity-Bench/",
-            "b22215c16f789100449c34bf4eaa3fb178983d69",
+            self.git_url(),
+            self.git_hash(),
         )
 
     def benchmarks(self) -> list[Benchmark]:
@@ -139,7 +145,8 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
-                description=self.description(),
+                git_url=self.git_url(),
+                git_hash=self.git_hash(),
             )
         ]
 
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 82fc7ca1fddc2..b9ebfdcb60952 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -27,7 +27,8 @@ class Result:
     name: str = ""
     lower_is_better: bool = True
     suite: str = "Unknown"
-
+    git_url: str = ""
+    git_hash: str = ""
 
 @dataclass_json
 @dataclass

From 1dad51339a1f6684aa82c2023c7718bbf74c0be0 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 16:37:23 +0100
Subject: [PATCH 64/79] fix git repo/hash for benchmarks

---
 devops/scripts/benchmarks/benches/compute.py   | 4 ++--
 devops/scripts/benchmarks/benches/llamacpp.py  | 4 ++--
 devops/scripts/benchmarks/benches/syclbench.py | 4 ++--
 devops/scripts/benchmarks/benches/velocity.py  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 0646aa500450a..d83a0d081af57 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -243,8 +243,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
-                    git_url=self.git_url(),
-                    git_hash=self.git_hash(),
+                    git_url=self.bench.git_url(),
+                    git_hash=self.bench.git_hash(),
                 )
             )
         return ret
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 33ffd1f11eabd..86d41ed525292 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -148,8 +148,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
-                    git_url=self.git_url(),
-                    git_hash=self.git_hash(),
+                    git_url=self.bench.git_url(),
+                    git_hash=self.bench.git_hash(),
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 0d924f7427ef0..9854c92d338fc 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -165,8 +165,8 @@ def run(self, env_vars) -> list[Result]:
                             env=env_vars,
                             stdout=row,
                             unit="ms",
-                            git_url=self.git_url(),
-                            git_hash=self.git_hash(),
+                            git_url=self.bench.git_url(),
+                            git_hash=self.bench.git_hash(),
                         )
                     )
 
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 4db6a87a97325..493298dea8b10 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -145,8 +145,8 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
-                git_url=self.git_url(),
-                git_hash=self.git_hash(),
+                git_url=self.vb.git_url(),
+                git_hash=self.vb.git_hash(),
             )
         ]
 

From 2dbf3503a8af2bcb8f9702e8ec26a18ea433ca0e Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 21 Mar 2025 13:22:39 -0700
Subject: [PATCH 65/79] Revert changes to workflow files

---
 .github/workflows/sycl-docs.yml              |   6 -
 .github/workflows/sycl-linux-run-tests.yml   |  22 --
 .github/workflows/ur-benchmarks-reusable.yml | 220 +------------------
 .github/workflows/ur-benchmarks.yml          |  55 +----
 .github/workflows/ur-build-hw.yml            |   2 +-
 5 files changed, 14 insertions(+), 291 deletions(-)

diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml
index 6b748ec9c7ebb..5c1e8e425111b 100644
--- a/.github/workflows/sycl-docs.yml
+++ b/.github/workflows/sycl-docs.yml
@@ -49,13 +49,7 @@ jobs:
         mkdir clang
         mv $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* .
         mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/
-        cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks
         touch .nojekyll
-        # Update benchmarking dashboard configuration
-        cat << 'EOF' > benchmarks/config.js
-        remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json';
-        defaultCompareNames = ["Baseline_PVC_L0"];
-        EOF 
     # Upload the generated docs as an artifact and deploy to GitHub Pages.
     - name: Upload artifact
       uses: actions/upload-pages-artifact@v3
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 3a93c2aae254c..2f3c02bf334ed 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -114,15 +114,6 @@ on:
         default: ''
         required: False
 
-      benchmark_upload_results:
-        type: string
-        default: 'false'
-        required: False
-      benchmark_build_hash:
-        type: string
-        default: ''
-        required: False
-
   workflow_dispatch:
     inputs:
       runner:
@@ -135,7 +126,6 @@ on:
           - '["cts-cpu"]'
           - '["Linux", "build"]'
           - '["cuda"]'
-          - '["Linux", "bmg"]'
           - '["PVC_PERF"]'
       image:
         type: choice
@@ -164,7 +154,6 @@ on:
           - e2e
           - cts
           - compute-benchmarks
-          - benchmark_v2
 
       env:
         description: |
@@ -348,14 +337,3 @@ jobs:
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
-
-    - name: Run benchmarks
-      if: inputs.tests_selector == 'benchmark_v2'
-      uses: ./devops/actions/run-tests/benchmark_v2
-      with:
-        target_devices: ${{ inputs.target_devices }}
-        upload_results: ${{ inputs.benchmark_upload_results }}
-        build_hash: ${{ inputs.benchmark_build_hash }}
-      env:
-        RUNNER_TAG: ${{ inputs.runner }}
-        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index d7c32edfdfc2a..66ffcecd70314 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -1,220 +1,12 @@
 name: Benchmarks Reusable
 
-on:
-  workflow_call:
-    inputs:
-      str_name:
-        required: true
-        type: string
-      pr_no:
-        required: true
-        # even though this is a number, this is a workaround for issues with
-        # reusable workflow calls that result in "Unexpected value '0'" error.
-        type: string
-      bench_script_params:
-        required: false
-        type: string
-        default: ''
-      sycl_config_params:
-        required: false
-        type: string
-        default: ''
-      upload_report:
-        required: false
-        type: boolean
-        default: false
-      compute_runtime_commit:
-        required: false
-        type: string
-        default: ''
+# This workflow is a WIP: This workflow file acts as a placeholder.
 
-permissions:
-  contents: read
-  pull-requests: write
+on: [ workflow_call ]
 
 jobs:
-  bench-run:
-    name: Build SYCL, Run Benchmarks
-    strategy:
-      matrix:
-        adapter: [
-          {str_name: "${{ inputs.str_name }}",
-          sycl_config: "${{ inputs.sycl_config_params }}"
-          }
-        ]
-        build_type: [Release]
-        compiler: [{c: clang, cxx: clang++}]
-
-    runs-on: "PVC_PERF"
-
+  do-nothing:
+    runs-on: ubuntu-latest
     steps:
-    - name: Add comment to PR
-      uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
-      if: ${{ always() && inputs.pr_no != 0 }}
-      with:
-        script: |
-          const pr_no = '${{ inputs.pr_no }}';
-          const adapter = '${{ matrix.adapter.str_name }}';
-          const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
-          const params = '${{ inputs.bench_script_params }}';
-          const body = `Compute Benchmarks ${adapter} run (with params: ${params}):\n${url}`;
-
-          github.rest.issues.createComment({
-            issue_number: pr_no,
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            body: body
-          })
-
-    - name: Checkout SYCL
-      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
-      with:
-        path: sycl-repo
-
-    # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged.
-    - name: Fetch PR's merge commit
-      if: ${{ inputs.pr_no != 0 }}
-      working-directory: ${{github.workspace}}/sycl-repo
-      run: |
-        git fetch -- https://github.com/${{github.repository}} +refs/pull/${{ inputs.pr_no }}/*:refs/remotes/origin/pr/${{ inputs.pr_no }}/*
-        git checkout origin/pr/${{ inputs.pr_no }}/merge
-        git rev-parse origin/pr/${{ inputs.pr_no }}/merge
-
-    # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location
-    - name: Establish bench scripts location
-      run: |
-        if [ -d "${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" ]; then
-          echo "Bench scripts are in devops/scripts"
-          echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" >> $GITHUB_ENV
-        elif [ -d "${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" ]; then
-          echo "Bench scripts are in unified-runtime/scripts"
-          echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" >> $GITHUB_ENV
-        else
-          echo "Bench scripts are absent...?"
-          exit 1
-        fi
-
-    - name: Create virtual environment
-      run: python -m venv .venv
-
-    - name: Activate virtual environment and install pip packages
-      run: |
-        source .venv/bin/activate
-        pip install -r ${BENCH_SCRIPTS_DIR}/requirements.txt
-
-    - name: Configure SYCL
-      run: >
-        python3 sycl-repo/buildbot/configure.py
-        -t ${{matrix.build_type}}
-        -o ${{github.workspace}}/sycl_build
-        --cmake-gen "Ninja"
-        --cmake-opt="-DLLVM_INSTALL_UTILS=ON"
-        --cmake-opt="-DSYCL_PI_TESTS=OFF"
-        --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache
-        --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-        ${{matrix.adapter.sycl_config}}
-
-    - name: Build SYCL
-      run: cmake --build ${{github.workspace}}/sycl_build -j $(nproc)
-
-    # We need a complete installed UR for compute-benchmarks.
-    - name: Configure UR
-      run: >
-        cmake -DCMAKE_BUILD_TYPE=${{matrix.build_type}}
-        -S${{github.workspace}}/sycl-repo/unified-runtime
-        -B${{github.workspace}}/ur_build
-        -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/ur_install
-        -DUR_BUILD_TESTS=OFF
-        -DUR_BUILD_ADAPTER_L0=ON
-        -DUR_BUILD_ADAPTER_L0_V2=ON
-        -DUMF_DISABLE_HWLOC=ON
-
-    - name: Build UR
-      run: cmake --build ${{github.workspace}}/ur_build -j $(nproc)
-
-    - name: Install UR
-      run: cmake --install ${{github.workspace}}/ur_build
-
-    - name: Compute core range
-      run: |
-        # Compute the core range for the first NUMA node; second node is for UMF jobs.
-        # Skip the first 4 cores - the kernel is likely to schedule more work on these.
-        CORES="$(lscpu | awk '
-          /NUMA node0 CPU|On-line CPU/ {line=$0}
-          END {
-            split(line, a, " ")
-            split(a[4], b, ",")
-            sub(/^0/, "4", b[1])
-            print b[1]
-          }')"
-        echo "Selected core: $CORES"
-        echo "CORES=$CORES" >> $GITHUB_ENV
-
-        ZE_AFFINITY_MASK=0
-        echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
-
-    - name: Run benchmarks
-      working-directory: ${{ github.workspace }}
-      id: benchmarks
-      run: >
-        source .venv/bin/activate &&
-        taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py
-        ~/llvm_bench_workdir
-        --sycl ${{ github.workspace }}/sycl_build
-        --ur ${{ github.workspace }}/ur_install
-        --adapter ${{ matrix.adapter.str_name }}
-        --compare baseline
-        --compute-runtime ${{ inputs.compute_runtime_commit }}
-        --build-igc
-        ${{ inputs.upload_report && '--output-html' || '' }}
-        ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
-        ${{ inputs.bench_script_params }}
-
-    - name: Print benchmark results
-      run: |
-        cat ${{ github.workspace }}/benchmark_results.md || true
-
-    - name: Add comment to PR
-      uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
-      if: ${{ always() && inputs.pr_no != 0 }}
-      with:
-        script: |
-          let markdown = ""
-          try {
-            const fs = require('fs');
-            markdown = fs.readFileSync('benchmark_results.md', 'utf8');
-          } catch(err) {
-          }
-
-          const pr_no = '${{ inputs.pr_no }}';
-          const adapter = '${{ matrix.adapter.str_name }}';
-          const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
-          const test_status = '${{ steps.benchmarks.outcome }}';
-          const job_status = '${{ job.status }}';
-          const params = '${{ inputs.bench_script_params }}';
-          const body = `Benchmarks ${adapter} run (${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`;
-
-          github.rest.issues.createComment({
-            issue_number: pr_no,
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            body: body
-          })
-
-    - name: Rename benchmark results file
-      if: ${{ always() && inputs.upload_report }}
-      run: mv benchmark_results.html benchmark_results_${{ inputs.pr_no }}.html
-
-    - name: Upload HTML report
-      if: ${{ always() && inputs.upload_report }}
-      uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
-      with:
-        path: benchmark_results_${{ inputs.pr_no }}.html
-        key: benchmark-results-${{ inputs.pr_no }}-${{ matrix.adapter.str_name }}-${{ github.run_id }}
-
-    # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location
-    - name: Get information about platform
-      if: ${{ always() }}
-      run: |
-        ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh || true
-        ${{github.workspace}}/sycl-repo/unified-runtime/.github/scripts/get_system_info.sh || true
+      - run: echo 'This workflow is a WIP.'
+  
diff --git a/.github/workflows/ur-benchmarks.yml b/.github/workflows/ur-benchmarks.yml
index cde4bfa828d71..23fbb1ad903b4 100644
--- a/.github/workflows/ur-benchmarks.yml
+++ b/.github/workflows/ur-benchmarks.yml
@@ -1,53 +1,12 @@
 name: Benchmarks
 
-on:
-  workflow_dispatch:
-    inputs:
-      str_name:
-        description: Adapter
-        type: choice
-        required: true
-        default: 'level_zero'
-        options:
-          - level_zero
-          - level_zero_v2
-      pr_no:
-        description: PR number (0 is sycl main branch)
-        type: number
-        required: true
-      bench_script_params:
-        description: Benchmark script arguments
-        type: string
-        required: false
-        default: ''
-      sycl_config_params:
-        description: Extra params for SYCL configuration
-        type: string
-        required: false
-        default: ''
-      compute_runtime_commit:
-        description: 'Compute Runtime commit'
-        type: string
-        required: false
-        default: ''
-      upload_report:
-        description: 'Upload HTML report'
-        type: boolean
-        required: false
-        default: false
+# This workflow is a WIP: this workflow file acts as a placeholder.
 
-permissions:
-  contents: read
-  pull-requests: write
+on: [ workflow_dispatch ]
 
 jobs:
-  manual:
-    name: Compute Benchmarks
-    uses: ./.github/workflows/ur-benchmarks-reusable.yml
-    with:
-      str_name: ${{ inputs.str_name }}
-      pr_no: ${{ inputs.pr_no }}
-      bench_script_params: ${{ inputs.bench_script_params }}
-      sycl_config_params: ${{ inputs.sycl_config_params }}
-      compute_runtime_commit: ${{ inputs.compute_runtime_commit }}
-      upload_report: ${{ inputs.upload_report }}
+  do-nothing:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo 'This workflow is a WIP.'
+
diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml
index eebac4e424a4b..a0f94ab10f538 100644
--- a/.github/workflows/ur-build-hw.yml
+++ b/.github/workflows/ur-build-hw.yml
@@ -156,4 +156,4 @@ jobs:
 
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/devops/scripts/get_system_info.sh
+      run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh

From a4a9907fa88448c33c53bdd55818a63731a9ac13 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 21 Mar 2025 13:42:27 -0700
Subject: [PATCH 66/79] Revert changes to composite actions

---
 devops/actions/run-tests/benchmark/action.yml |   1 +
 .../actions/run-tests/benchmark_v2/action.yml | 135 ------------------
 2 files changed, 1 insertion(+), 135 deletions(-)
 delete mode 100644 devops/actions/run-tests/benchmark_v2/action.yml

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 03b7d4ad776fd..e357e2bddec30 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -95,6 +95,7 @@ runs:
     if: always()
     shell: bash
     run: |
+      # TODO -- waiting on security clearance
       # Load configuration values
       $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
 
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
deleted file mode 100644
index bab571ec16ff2..0000000000000
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ /dev/null
@@ -1,135 +0,0 @@
-name: 'Run benchmarks'
-
-# This action assumes the following prerequisites:
-#
-# - SYCL is placed in ./toolchain -- TODO change this
-# - /devops has been checked out in ./devops.
-# - env.GITHUB_TOKEN was properly set, because according to Github, that's
-#   apparently the recommended way to pass a secret into a github action:
-
-#   https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
-#
-# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently,
-#   only specific runners are fully supported.
-
-inputs:
-  target_devices:
-    type: string
-    required: True
-  upload_results:
-    type: string
-    required: True
-  build_hash:
-    type: string
-    required: False
-    default: ''
-
-runs:
-  using: "composite"
-  steps:
-  - name: Check specified runner type / target backend
-    shell: bash
-    env:
-      TARGET_DEVICE: ${{ inputs.target_devices }}
-      RUNNER_NAME: ${{ runner.name }}
-    run: |
-      case "$RUNNER_TAG" in
-        '["PVC_PERF"]' ) ;;
-        *)
-          echo "#"
-          echo "# WARNING: Only specific tuned runners are fully supported."
-          echo "# This workflow is not guaranteed to work with other runners."
-          echo "#" ;;
-      esac
-
-      # Ensure runner name has nothing injected
-      # TODO: in terms of security, is this overkill?
-      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
-          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
-          exit 1
-      fi
-      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
-
-      # input.target_devices is not directly used, as this allows code injection
-      case "$TARGET_DEVICE" in
-        level_zero:*) ;;
-        *)
-          echo "#"
-          echo "# WARNING: Only level_zero backend is fully supported."
-          echo "# This workflow is not guaranteed to work with other backends."
-          echo "#" ;;
-      esac
-      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
-
-  - name: Compute CPU core range to run benchmarks on
-    shell: bash
-    run: |
-      # Compute the core range for the first NUMA node; second node is used by
-      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
-      # work on these.
-      CORES="$(lscpu | awk '
-        /NUMA node0 CPU|On-line CPU/ {line=$0}
-        END {
-          split(line, a, " ")
-          split(a[4], b, ",")
-          sub(/^0/, "4", b[1])
-          print b[1]
-        }')"
-      echo "CPU core range to use: $CORES"
-      echo "CORES=$CORES" >> $GITHUB_ENV
-
-      ZE_AFFINITY_MASK=0
-      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
-  - name: Checkout results repo
-    shell: bash
-    run: |
-      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
-  - name: Run compute-benchmarks
-    env:
-      BUILD_HASH: ${{ inputs.build_hash }}
-    shell: bash
-    run: |
-      # TODO generate summary + display helpful message here
-      export CMPLR_ROOT=./toolchain
-      echo "-----"
-      sycl-ls
-      echo "-----"
-      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      echo "-----"
-      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
-
-      # TODO accomodate for different GPUs and backends
-      SAVE_NAME="Baseline_PVC_L0"
-      if [ -n "$BUILD_HASH" ]; then
-          SAVE_NAME="Commit_PVC_$BUILD_HASH"
-      fi
-
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
-        "$(realpath ./llvm_test_workdir)" \
-        --sycl "$(realpath ./toolchain)" \
-        --save "$SAVE_NAME" \
-        --output-html remote \
-        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --preset Minimal
-      echo "-----"
-  - name: Push compute-benchmarks results
-    if: inputs.upload_results == 'true' && always()
-    shell: bash
-    run: |
-      # TODO redo configuration
-      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-
-      cd "./llvm-ci-perf-results"
-      git config user.name "SYCL Benchmarking Bot"
-      git config user.email "sys_sycl_benchmarks@intel.com"
-      git pull
-      git add .
-      # Make sure changes have been made
-      if git diff --quiet && git diff --cached --quiet; then
-        echo "No new results added, skipping push."
-      else
-        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
-      fi
-

From bdef08b60513462a2ab4bd35b1002f7ffb84f3f4 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 21 Mar 2025 13:44:07 -0700
Subject: [PATCH 67/79] Revert changes to get_system_info.sh

---
 {devops => unified-runtime/.github}/scripts/get_system_info.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {devops => unified-runtime/.github}/scripts/get_system_info.sh (100%)

diff --git a/devops/scripts/get_system_info.sh b/unified-runtime/.github/scripts/get_system_info.sh
similarity index 100%
rename from devops/scripts/get_system_info.sh
rename to unified-runtime/.github/scripts/get_system_info.sh

From 9e51c86b381500bbcae4f9587ef821708ef16752 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 21 Mar 2025 17:58:02 -0700
Subject: [PATCH 68/79] Revert changes not related to metadata

---
 devops/scripts/benchmarks/README.md           |   8 +-
 .../benchmark_results.html.template           | 192 +++++++++
 devops/scripts/benchmarks/history.py          |  57 ++-
 devops/scripts/benchmarks/main.py             |  84 +---
 devops/scripts/benchmarks/options.py          |  12 +-
 devops/scripts/benchmarks/output_html.py      | 378 +++++++++++++++---
 devops/scripts/benchmarks/output_markdown.py  |  44 +-
 devops/scripts/benchmarks/requirements.txt    |   1 -
 8 files changed, 592 insertions(+), 184 deletions(-)
 create mode 100644 devops/scripts/benchmarks/benchmark_results.html.template

diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
index fcadded3cad51..004fe14eca35b 100644
--- a/devops/scripts/benchmarks/README.md
+++ b/devops/scripts/benchmarks/README.md
@@ -6,8 +6,6 @@ Scripts for running performance tests on SYCL and Unified Runtime.
 
 - [Velocity Bench](https://github.com/oneapi-src/Velocity-Bench)
 - [Compute Benchmarks](https://github.com/intel/compute-benchmarks/)
-- [LlamaCpp Benchmarks](https://github.com/ggerganov/llama.cpp)
-- [SYCL-Bench](https://github.com/unisa-hpc/sycl-bench)
 
 ## Running
 
@@ -29,6 +27,8 @@ You can also include additional benchmark parameters, such as environment variab
 
 Once all the required information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request.
 
+By default, all benchmark runs are compared against `baseline`, which is a well-established set of the latest data.
+
 You must be a member of the `oneapi-src` organization to access these features.
 
 ## Comparing results
@@ -37,8 +37,8 @@ By default, the benchmark results are not stored. To store them, use the option
 
 You can compare benchmark results using `--compare` option. The comparison will be presented in a markdown output file (see below). If you want to calculate the relative performance of the new results against the previously saved data, use `--compare <previously_saved_data>` (i.e. `--compare baseline`). In case of comparing only stored data without generating new results, use `--dry-run --compare <name1> --compare <name2> --relative-perf <name1>`, where `name1` indicates the baseline for the relative performance calculation and `--dry-run` prevents the script for running benchmarks. Listing more than two `--compare` options results in displaying only execution time, without statistical analysis.
 
-Baseline_L0, as well as Baseline_L0v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results
-are stored [here](https://oneapi-src.github.io/unified-runtime/performance/).
+Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results
+are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html).
 
 ## Output formats
 You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`.
diff --git a/devops/scripts/benchmarks/benchmark_results.html.template b/devops/scripts/benchmarks/benchmark_results.html.template
new file mode 100644
index 0000000000000..1deeedad66b00
--- /dev/null
+++ b/devops/scripts/benchmarks/benchmark_results.html.template
@@ -0,0 +1,192 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Benchmark Results</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            margin: 0;
+            padding: 16px;
+            background: #f8f9fa;
+        }
+        .container {
+            max-width: 1100px;
+            margin: 0 auto;
+        }
+        h1, h2 {
+            color: #212529;
+            text-align: center;
+            margin-bottom: 24px;
+            font-weight: 500;
+        }
+        .chart {
+            background: white;
+            border-radius: 8px;
+            padding: 24px;
+            margin-bottom: 24px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+            overflow-x: auto;
+        }
+        .chart > div {
+            min-width: 600px;
+            margin: 0 auto;
+        }
+        @media (max-width: 768px) {
+            body {
+                padding: 12px;
+            }
+            .chart {
+                padding: 16px;
+                border-radius: 6px;
+            }
+            h1 {
+                font-size: 24px;
+                margin-bottom: 16px;
+            }
+        }
+        .filter-container {
+            text-align: center;
+            margin-bottom: 24px;
+        }
+        .filter-container input {
+            padding: 8px;
+            font-size: 16px;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            width: 400px;
+            max-width: 100%;
+        }
+        .suite-filter-container {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .suite-checkbox {
+            margin: 0 8px;
+        }
+        details {
+            margin-bottom: 24px;
+        }
+        summary {
+            font-size: 18px;
+            font-weight: 500;
+            cursor: pointer;
+            padding: 12px;
+            background: #e9ecef;
+            border-radius: 8px;
+            user-select: none;
+        }
+        summary:hover {
+            background: #dee2e6;
+        }
+    </style>
+    <script>
+        function getQueryParam(param) {
+            const urlParams = new URLSearchParams(window.location.search);
+            return urlParams.get(param);
+        }
+
+        function filterCharts() {
+            const regexInput = document.getElementById('bench-filter').value;
+            const regex = new RegExp(regexInput, 'i');
+            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
+            const charts = document.querySelectorAll('.chart');
+
+            charts.forEach(chart => {
+                const label = chart.getAttribute('data-label');
+                const suite = chart.getAttribute('data-suite');
+                if (regex.test(label) && activeSuites.includes(suite)) {
+                    chart.style.display = '';
+                } else {
+                    chart.style.display = 'none';
+                }
+            });
+
+            updateURL();
+        }
+
+        function updateURL() {
+            const url = new URL(window.location);
+            const regex = document.getElementById('bench-filter').value;
+            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
+
+            if (regex) {
+                url.searchParams.set('regex', regex);
+            } else {
+                url.searchParams.delete('regex');
+            }
+
+            if (activeSuites.length > 0) {
+                url.searchParams.set('suites', activeSuites.join(','));
+            } else {
+                url.searchParams.delete('suites');
+            }
+
+            history.replaceState(null, '', url);
+        }
+
+        document.addEventListener('DOMContentLoaded', (event) => {
+            const regexParam = getQueryParam('regex');
+            const suitesParam = getQueryParam('suites');
+
+            if (regexParam) {
+                document.getElementById('bench-filter').value = regexParam;
+            }
+
+            const suiteCheckboxes = document.querySelectorAll('.suite-checkbox');
+            if (suitesParam) {
+                const suites = suitesParam.split(',');
+                suiteCheckboxes.forEach(checkbox => {
+                    if (suites.includes(checkbox.getAttribute('data-suite'))) {
+                        checkbox.checked = true;
+                    } else {
+                        checkbox.checked = false;
+                    }
+                });
+            } else {
+                suiteCheckboxes.forEach(checkbox => {
+                    checkbox.checked = true;
+                });
+            }
+            filterCharts();
+
+            suiteCheckboxes.forEach(checkbox => {
+                checkbox.addEventListener('change', () => {
+                    filterCharts();
+                });
+            });
+
+            document.getElementById('bench-filter').addEventListener('input', () => {
+                filterCharts();
+            });
+        });
+    </script>
+</head>
+<body>
+    <div class="container">
+        <h1>Benchmark Results</h1>
+        <div class="filter-container">
+            <input type="text" id="bench-filter" placeholder="Regex...">
+        </div>
+        <div class="suite-filter-container">
+            ${suite_checkboxes_html}
+        </div>
+        <details class="timeseries">
+            <summary>Historical Results</summary>
+            <div class="charts">
+                ${timeseries_charts_html}
+            </div>
+        </details>
+        <details class="bar-charts">
+            <summary>Comparisons</summary>
+            <div class="charts">
+                ${bar_charts_html}
+            </div>
+        </details>
+    </div>
+</body>
+</html>
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 0b80c54ad7393..7902aa4f04c35 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024-2025 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -6,14 +6,14 @@
 import os
 import json
 from pathlib import Path
-import socket
-from utils.result import Result, BenchmarkRun
+from benches.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
 from utils.utils import run
 
 
 class BenchmarkHistory:
+    benchmark_run_index_max = 0
     runs = []
 
     def __init__(self, dir):
@@ -35,55 +35,42 @@ def load(self, n: int):
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
 
-        # Extract timestamp and sort files by it
-        def extract_timestamp(file_path: Path) -> str:
+        # Extract index numbers and sort files by index number
+        def extract_index(file_path: Path) -> int:
             try:
-                return file_path.stem.split("_")[-1]
-            except IndexError:
-                return ""
+                return int(file_path.stem.split("_")[0])
+            except (IndexError, ValueError):
+                return -1
 
-        benchmark_files.sort(key=extract_timestamp, reverse=True)
+        benchmark_files = [
+            file for file in benchmark_files if extract_index(file) != -1
+        ]
+        benchmark_files.sort(key=extract_index)
 
         # Load the first n benchmark files
         benchmark_runs = []
-        for file_path in benchmark_files[:n]:
+        for file_path in benchmark_files[n::-1]:
             benchmark_run = self.load_result(file_path)
             if benchmark_run:
                 benchmark_runs.append(benchmark_run)
 
+        if benchmark_files:
+            self.benchmark_run_index_max = extract_index(benchmark_files[-1])
+
         self.runs = benchmark_runs
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
-            script_dir = os.path.dirname(os.path.abspath(__file__))
-            result = run("git rev-parse --short HEAD", cwd=script_dir)
+            result = run("git rev-parse --short HEAD")
             git_hash = result.stdout.decode().strip()
-
-            # Get the GitHub repo URL from git remote
-            remote_result = run("git remote get-url origin", cwd=script_dir)
-            remote_url = remote_result.stdout.decode().strip()
-
-            # Convert SSH or HTTPS URL to owner/repo format
-            if remote_url.startswith("git@github.com:"):
-                # SSH format: git@github.com:owner/repo.git
-                github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
-            elif remote_url.startswith("https://github.com/"):
-                # HTTPS format: https://github.com/owner/repo.git
-                github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
-            else:
-                github_repo = None
-
         except:
             git_hash = "unknown"
-            github_repo = None
 
         return BenchmarkRun(
             name=name,
             git_hash=git_hash,
-            github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
-            hostname=socket.gethostname(),
         )
 
     def save(self, save_name, results: list[Result], to_file=True):
@@ -97,9 +84,12 @@ def save(self, save_name, results: list[Result], to_file=True):
         results_dir = Path(os.path.join(self.dir, "results"))
         os.makedirs(results_dir, exist_ok=True)
 
-        # Use formatted timestamp for the filename
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
+        self.benchmark_run_index_max += 1
+        file_path = Path(
+            os.path.join(
+                results_dir, f"{self.benchmark_run_index_max}_{save_name}.json"
+            )
+        )
         with file_path.open("w") as file:
             json.dump(serialized, file, indent=4)
         print(f"Benchmark results saved to {file_path}")
@@ -130,7 +120,6 @@ def compute_average(self, data: list[BenchmarkRun]):
             name=first_run.name,
             git_hash="average",
             date=first_run.date,  # should this be different?
-            hostname=first_run.hostname,
         )
 
         return average_benchmark_run
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 14e5fe1a04624..620c72b878137 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,7 +17,6 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
-from presets import enabled_suites, presets
 
 import argparse
 import re
@@ -179,9 +178,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     failures = {}
 
     for s in suites:
-        if s.name() not in enabled_suites(options.preset):
-            continue
-
         suite_benchmarks = s.benchmarks()
         if filter:
             suite_benchmarks = [
@@ -257,10 +253,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if not options.dry_run:
         chart_data = {this_name: results}
 
-    results_dir = directory
-    if options.custom_results_dir:
-        results_dir = Path(options.custom_results_dir)
-    history = BenchmarkHistory(results_dir)
+    history = BenchmarkHistory(directory)
     # limit how many files we load.
     # should this be configurable?
     history.load(1000)
@@ -277,18 +270,14 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
 
     if options.output_markdown:
         markdown_content = generate_markdown(
-            this_name, chart_data, failures, options.output_markdown
+            this_name, chart_data, options.output_markdown
         )
 
-        md_path = options.output_directory
-        if options.output_directory is None:
-            md_path = os.getcwd()
-
-        with open(os.path.join(md_path, "benchmark_results.md"), "w") as file:
+        with open("benchmark_results.md", "w") as file:
             file.write(markdown_content)
 
         print(
-            f"Markdown with benchmark results has been written to {md_path}/benchmark_results.md"
+            f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md"
         )
 
     saved_name = save_name if save_name is not None else this_name
@@ -302,10 +291,14 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             compare_names.append(saved_name)
 
     if options.output_html:
-        html_path = options.output_directory
-        if options.output_directory is None:
-            html_path = os.path.join(os.path.dirname(__file__), "html")
-        generate_html(history.runs, compare_names, html_path, metadata)
+        html_content = generate_html(history.runs, "intel/llvm", compare_names)
+
+        with open("benchmark_results.html", "w") as file:
+            file.write(html_content)
+
+        print(
+            f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html"
+        )
 
 
 def validate_and_parse_env_args(env_args):
@@ -388,6 +381,12 @@ def validate_and_parse_env_args(env_args):
         help="Regex pattern to filter benchmarks by name.",
         default=None,
     )
+    parser.add_argument(
+        "--epsilon",
+        type=float,
+        help="Threshold to consider change of performance significant",
+        default=options.epsilon,
+    )
     parser.add_argument(
         "--verbose", help="Print output of all the commands.", action="store_true"
     )
@@ -414,17 +413,7 @@ def validate_and_parse_env_args(env_args):
         help="Specify whether markdown output should fit the content size limit for request validation",
     )
     parser.add_argument(
-        "--output-html",
-        help="Create HTML output. Local output is for direct local viewing of the html file, remote is for server deployment.",
-        nargs="?",
-        const=options.output_html,
-        choices=["local", "remote"],
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=str,
-        help="Location for output files, if --output-html or --output_markdown was specified.",
-        default=None,
+        "--output-html", help="Create HTML output", action="store_true", default=False
     )
     parser.add_argument(
         "--dry-run",
@@ -456,31 +445,7 @@ def validate_and_parse_env_args(env_args):
         help="The name of the results which should be used as a baseline for metrics calculation",
         default=options.current_run_name,
     )
-    parser.add_argument(
-        "--cudnn_directory",
-        type=str,
-        help="Directory for cudnn library",
-        default=None,
-    )
-    parser.add_argument(
-        "--cublas_directory",
-        type=str,
-        help="Directory for cublas library",
-        default=None,
-    )
-    parser.add_argument(
-        "--preset",
-        type=str,
-        choices=[p for p in presets.keys()],
-        help="Benchmark preset to run.",
-        default=options.preset,
-    )
-    parser.add_argument(
-        "--results-dir",
-        type=str,
-        help="Specify a custom results directory",
-        default=options.custom_results_dir,
-    )
+
     parser.add_argument(
         "--build-jobs",
         type=int,
@@ -498,6 +463,7 @@ def validate_and_parse_env_args(env_args):
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
+    options.epsilon = args.epsilon
     options.ur = args.ur
     options.ur_adapter = args.adapter
     options.exit_on_failure = args.exit_on_failure
@@ -510,10 +476,6 @@ def validate_and_parse_env_args(env_args):
     options.iterations_stddev = args.iterations_stddev
     options.build_igc = args.build_igc
     options.current_run_name = args.relative_perf
-    options.cudnn_directory = args.cudnn_directory
-    options.cublas_directory = args.cublas_directory
-    options.preset = args.preset
-    options.custom_results_dir = args.results_dir
     options.build_jobs = args.build_jobs
 
     if args.build_igc and args.compute_runtime is None:
@@ -521,10 +483,6 @@ def validate_and_parse_env_args(env_args):
     if args.compute_runtime is not None:
         options.build_compute_runtime = True
         options.compute_runtime_tag = args.compute_runtime
-    if args.output_dir is not None:
-        if not os.path.isdir(args.output_dir):
-            parser.error("Specified --output-dir is not a valid path")
-        options.output_directory = os.path.abspath(args.output_dir)
 
     benchmark_filter = re.compile(args.filter) if args.filter else None
 
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index c852e50c71372..7bbca93a6f4fc 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -2,8 +2,6 @@
 from enum import Enum
 import multiprocessing
 
-from presets import presets
-
 
 class Compare(Enum):
     LATEST = "latest"
@@ -32,20 +30,18 @@ class Options:
     compare: Compare = Compare.LATEST
     compare_max: int = 10  # average/median over how many results
     output_markdown: MarkdownSize = MarkdownSize.SHORT
-    output_html: str = "local"
-    output_directory: str = None
+    output_html: bool = False
     dry_run: bool = False
+    # these two should probably be merged into one setting
     stddev_threshold: float = 0.02
+    epsilon: float = 0.02
     iterations_stddev: int = 5
     build_compute_runtime: bool = False
     extra_ld_libraries: list[str] = field(default_factory=list)
     extra_env_vars: dict = field(default_factory=dict)
-    compute_runtime_tag: str = "25.05.32567.18"
+    compute_runtime_tag: str = "25.05.32567.12"
     build_igc: bool = False
     current_run_name: str = "This PR"
-    preset: str = "Full"
-    custom_results_dir = None
     build_jobs: int = multiprocessing.cpu_count()
 
-
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 319e796a3831d..4ba395bc3aac6 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -1,58 +1,340 @@
-# Copyright (C) 2024-2025 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import json
+import re
 import os
-from options import options
-from utils.result import BenchmarkMetadata, BenchmarkOutput
-from benches.base import benchmark_tags, benchmark_tags_dict
+from pathlib import Path
+import matplotlib.pyplot as plt
+import mpld3
+from collections import defaultdict
+from dataclasses import dataclass
+import matplotlib.dates as mdates
+from benches.result import BenchmarkRun, Result
+import numpy as np
+from string import Template
+
+
+@dataclass
+class BenchmarkMetadata:
+    unit: str
+    suite: str
+    lower_is_better: bool
+
+
+@dataclass
+class BenchmarkSeries:
+    label: str
+    metadata: BenchmarkMetadata
+    runs: list[BenchmarkRun]
+
+
+@dataclass
+class BenchmarkChart:
+    label: str
+    suite: str
+    html: str
+
+
+def tooltip_css() -> str:
+    return ".mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}"
+
+
+def create_time_series_chart(
+    benchmarks: list[BenchmarkSeries], github_repo: str
+) -> list[BenchmarkChart]:
+    plt.close("all")
+
+    num_benchmarks = len(benchmarks)
+    if num_benchmarks == 0:
+        return []
+
+    html_charts = []
+
+    for _, benchmark in enumerate(benchmarks):
+        fig, ax = plt.subplots(figsize=(10, 4))
+
+        all_values = []
+        all_stddevs = []
+
+        for run in benchmark.runs:
+            sorted_points = sorted(run.results, key=lambda x: x.date)
+            dates = [point.date for point in sorted_points]
+            values = [point.value for point in sorted_points]
+            stddevs = [point.stddev for point in sorted_points]
+
+            all_values.extend(values)
+            all_stddevs.extend(stddevs)
+
+            ax.errorbar(dates, values, yerr=stddevs, fmt="-", label=run.name, alpha=0.5)
+            scatter = ax.scatter(dates, values, picker=True)
+
+            tooltip_labels = [
+                f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
+                f"Value: {point.value:.2f} {benchmark.metadata.unit}\n"
+                f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n"
+                f"Git Hash: {point.git_hash}"
+                for point in sorted_points
+            ]
+
+            targets = [
+                f"https://github.com/{github_repo}/commit/{point.git_hash}"
+                for point in sorted_points
+            ]
+
+            tooltip = mpld3.plugins.PointHTMLTooltip(
+                scatter, tooltip_labels, css=tooltip_css(), targets=targets
+            )
+            mpld3.plugins.connect(fig, tooltip)
+
+        ax.set_title(benchmark.label, pad=20)
+        performance_indicator = (
+            "lower is better"
+            if benchmark.metadata.lower_is_better
+            else "higher is better"
+        )
+        ax.text(
+            0.5,
+            1.05,
+            f"({performance_indicator})",
+            ha="center",
+            transform=ax.transAxes,
+            style="italic",
+            fontsize=7,
+            color="#666666",
+        )
+
+        ax.set_xlabel("")
+        unit = benchmark.metadata.unit
+        ax.set_ylabel(f"Value ({unit})" if unit else "Value")
+        ax.grid(True, alpha=0.2)
+        ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
+        ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter("%Y-%m-%d %H:%M:%S"))
+
+        plt.tight_layout()
+        html_charts.append(
+            BenchmarkChart(
+                html=mpld3.fig_to_html(fig),
+                label=benchmark.label,
+                suite=benchmark.metadata.suite,
+            )
+        )
+        plt.close(fig)
+
+    return html_charts
+
+
+@dataclass
+class ExplicitGroup:
+    name: str
+    nnames: int
+    metadata: BenchmarkMetadata
+    runs: dict[str, dict[str, Result]]
+
+
+def create_explicit_groups(
+    benchmark_runs: list[BenchmarkRun], compare_names: list[str]
+) -> list[ExplicitGroup]:
+    groups = {}
+
+    for run in benchmark_runs:
+        if run.name in compare_names:
+            for res in run.results:
+                if res.explicit_group != "":
+                    if res.explicit_group not in groups:
+                        groups[res.explicit_group] = ExplicitGroup(
+                            name=res.explicit_group,
+                            nnames=len(compare_names),
+                            metadata=BenchmarkMetadata(
+                                unit=res.unit,
+                                lower_is_better=res.lower_is_better,
+                                suite=res.suite,
+                            ),
+                            runs={},
+                        )
+
+                    group = groups[res.explicit_group]
+                    if res.label not in group.runs:
+                        group.runs[res.label] = {name: None for name in compare_names}
+
+                    if group.runs[res.label][run.name] is None:
+                        group.runs[res.label][run.name] = res
+
+    return list(groups.values())
+
+
+def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]:
+    plt.close("all")
+
+    html_charts = []
+
+    for group in groups:
+        fig, ax = plt.subplots(figsize=(10, 6))
+
+        x = np.arange(group.nnames)
+        x_labels = []
+        width = 0.8 / len(group.runs)
+
+        max_height = 0
+
+        for i, (run_name, run_results) in enumerate(group.runs.items()):
+            offset = width * i
+
+            positions = x + offset
+            x_labels = run_results.keys()
+            valid_data = [r.value if r is not None else 0 for r in run_results.values()]
+            rects = ax.bar(positions, valid_data, width, label=run_name)
+            # This is a hack to disable all bar_label. Setting labels to empty doesn't work.
+            # We create our own labels below for each bar, this works better in mpld3.
+            ax.bar_label(rects, fmt="")
+
+            for rect, run, res in zip(rects, run_results.keys(), run_results.values()):
+                if res is None:
+                    continue
+
+                height = rect.get_height()
+                if height > max_height:
+                    max_height = height
+
+                ax.text(
+                    rect.get_x() + rect.get_width() / 2.0,
+                    height + 1,
+                    f"{res.value:.1f}",
+                    ha="center",
+                    va="bottom",
+                    fontsize=9,
+                )
+
+                tooltip_labels = [
+                    f"Date: {res.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
+                    f"Run: {run}\n"
+                    f"Label: {res.label}\n"
+                    f"Value: {res.value:.2f} {res.unit}\n"
+                    f"Stddev: {res.stddev:.2f} {res.unit}\n"
+                ]
+                tooltip = mpld3.plugins.LineHTMLTooltip(
+                    rect, tooltip_labels, css=tooltip_css()
+                )
+                mpld3.plugins.connect(ax.figure, tooltip)
+
+        # normally we'd just set legend to be outside
+        # the chart, but this is not supported by mpld3.
+        # instead, we adjust the y axis to account for
+        # the height of the bars.
+        legend_height = len(group.runs) * 0.1
+        ax.set_ylim(0, max_height * (1 + legend_height))
+
+        ax.set_xticks([])
+        ax.grid(True, axis="y", alpha=0.2)
+        ax.set_ylabel(f"Value ({group.metadata.unit})")
+        ax.legend(loc="upper left")
+        ax.set_title(group.name, pad=20)
+        performance_indicator = (
+            "lower is better" if group.metadata.lower_is_better else "higher is better"
+        )
+        ax.text(
+            0.5,
+            1.03,
+            f"({performance_indicator})",
+            ha="center",
+            transform=ax.transAxes,
+            style="italic",
+            fontsize=7,
+            color="#666666",
+        )
+
+        for idx, label in enumerate(x_labels):
+            # this is a hack to get labels to show above the legend
+            # we normalize the idx to transAxes transform and offset it a little.
+            x_norm = (idx + 0.3 - ax.get_xlim()[0]) / (
+                ax.get_xlim()[1] - ax.get_xlim()[0]
+            )
+            ax.text(x_norm, 1.03, label, transform=ax.transAxes, color="#666666")
+
+        plt.tight_layout()
+        html_charts.append(
+            BenchmarkChart(
+                label=group.name,
+                html=mpld3.fig_to_html(fig),
+                suite=group.metadata.suite,
+            )
+        )
+        plt.close(fig)
+
+    return html_charts
+
+
+def process_benchmark_data(
+    benchmark_runs: list[BenchmarkRun], compare_names: list[str]
+) -> list[BenchmarkSeries]:
+    benchmark_metadata: dict[str, BenchmarkMetadata] = {}
+    run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list))
+
+    for run in benchmark_runs:
+        if run.name not in compare_names:
+            continue
+
+        for result in run.results:
+            if result.label not in benchmark_metadata:
+                benchmark_metadata[result.label] = BenchmarkMetadata(
+                    unit=result.unit,
+                    lower_is_better=result.lower_is_better,
+                    suite=result.suite,
+                )
+
+            result.date = run.date
+            result.git_hash = run.git_hash
+            run_map[result.label][run.name].append(result)
+
+    benchmark_series = []
+    for label, metadata in benchmark_metadata.items():
+        runs = [
+            BenchmarkRun(name=run_name, results=results)
+            for run_name, results in run_map[label].items()
+        ]
+        benchmark_series.append(
+            BenchmarkSeries(label=label, metadata=metadata, runs=runs)
+        )
+
+    return benchmark_series
 
 
 def generate_html(
-    benchmark_runs: list,
-    compare_names: list[str],
-    html_path: str,
-    metadata: dict[str, BenchmarkMetadata],
-):
-    benchmark_runs.sort(key=lambda run: run.date, reverse=True)
-
-    # Create the comprehensive output object
-    output = BenchmarkOutput(
-        runs=benchmark_runs,
-        metadata=metadata,
-        tags=benchmark_tags_dict,
-        default_compare_names=compare_names,
+    benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]
+) -> str:
+    benchmarks = process_benchmark_data(benchmark_runs, compare_names)
+
+    timeseries = create_time_series_chart(benchmarks, github_repo)
+    timeseries_charts_html = "\n".join(
+        f'<div class="chart" data-label="{ts.label}" data-suite="{ts.suite}"><div>{ts.html}</div></div>'
+        for ts in timeseries
     )
 
-    if options.output_html == "local":
-        data_path = os.path.join(html_path, "data.js")
-        with open(data_path, "w") as f:
-            # For local format, we need to write JavaScript variable assignments
-            f.write("benchmarkRuns = ")
-            json.dump(json.loads(output.to_json())["runs"], f, indent=2)
-            f.write(";\n\n")
-
-            f.write("benchmarkMetadata = ")
-            json.dump(json.loads(output.to_json())["metadata"], f, indent=2)
-            f.write(";\n\n")
-
-            f.write("benchmarkTags = ")
-            json.dump(json.loads(output.to_json())["tags"], f, indent=2)
-            f.write(";\n\n")
-
-            f.write("defaultCompareNames = ")
-            json.dump(output.default_compare_names, f, indent=2)
-            f.write(";\n")
-
-        print(f"See {os.getcwd()}/html/index.html for the results.")
-    else:
-        # For remote format, we write a single JSON file
-        data_path = os.path.join(html_path, "data.json")
-        with open(data_path, "w") as f:
-            json.dump(json.loads(output.to_json()), f, indent=2)
-
-        print(
-            f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
-        )
+    explicit_groups = create_explicit_groups(benchmark_runs, compare_names)
+
+    bar_charts = create_grouped_bar_charts(explicit_groups)
+    bar_charts_html = "\n".join(
+        f'<div class="chart" data-label="{bc.label}" data-suite="{bc.suite}"><div>{bc.html}</div></div>'
+        for bc in bar_charts
+    )
+
+    suite_names = {t.suite for t in timeseries}
+    suite_checkboxes_html = " ".join(
+        f'<label><input type="checkbox" class="suite-checkbox" data-suite="{suite}" checked> {suite}</label>'
+        for suite in suite_names
+    )
+
+    script_path = os.path.dirname(os.path.realpath(__file__))
+    results_template_path = Path(script_path, "benchmark_results.html.template")
+    with open(results_template_path, "r") as file:
+        html_template = file.read()
+
+    template = Template(html_template)
+    data = {
+        "suite_checkboxes_html": suite_checkboxes_html,
+        "timeseries_charts_html": timeseries_charts_html,
+        "bar_charts_html": bar_charts_html,
+    }
+
+    return template.substitute(data)
diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
index 3295968603d0c..dd6711cec6365 100644
--- a/devops/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import collections
-from utils.result import Result
+from benches.result import Result
 from options import options, MarkdownSize
 import ast
 
@@ -79,7 +79,7 @@ def get_improved_regressed_summary(is_improved: bool, rows_count: int):
         "\n<details>\n"
         "<summary>\n"
         f"{title} {rows_count} "
-        f"(threshold {options.stddev_threshold*100:.2f}%)\n"
+        f"(threshold {options.epsilon*100:.2f}%)\n"
         "</summary>\n\n"
     )
 
@@ -138,6 +138,17 @@ def generate_markdown_details(
         env_dict = res.env
         command = res.command
 
+        # If data is collected from already saved results,
+        # the content is parsed as strings
+        if isinstance(res.env, str):
+            # Since the scripts would be used solely on data prepared
+            # by our scripts, this should be safe
+            # However, maybe needs an additional blessing
+            # https://docs.python.org/3/library/ast.html#ast.literal_eval
+            env_dict = ast.literal_eval(res.env)
+        if isinstance(res.command, str):
+            command = ast.literal_eval(res.command)
+
         section = (
             "\n<details>\n"
             f"<summary>{res.label}</summary>\n\n"
@@ -168,7 +179,7 @@ def generate_markdown_details(
             return "\nBenchmark details contain too many chars to display\n"
 
 
-def generate_summary_table(
+def generate_summary_table_and_chart(
     chart_data: dict[str, list[Result]], baseline_name: str, markdown_size: MarkdownSize
 ):
     summary_table = get_chart_markdown_header(
@@ -265,7 +276,7 @@ def generate_summary_table(
                 delta = oln.diff - 1
                 oln.row += f" {delta*100:.2f}%"
 
-                if abs(delta) > options.stddev_threshold:
+                if abs(delta) > options.epsilon:
                     if delta > 0:
                         improved_rows.append(oln.row + " | \n")
                     else:
@@ -363,27 +374,10 @@ def generate_summary_table(
                 return "\n# Summary\n" "Benchmark output is too large to display\n\n"
 
 
-def generate_failures_section(failures: dict[str, str]) -> str:
-    if not failures:
-        return ""
-
-    section = "\n# Failures\n"
-    section += "| Name | Failure |\n"
-    section += "|---|---|\n"
-
-    for name, failure in failures.items():
-        section += f"| {name} | {failure} |\n"
-
-    return section
-
-
 def generate_markdown(
-    name: str,
-    chart_data: dict[str, list[Result]],
-    failures: dict[str, str],
-    markdown_size: MarkdownSize,
+    name: str, chart_data: dict[str, list[Result]], markdown_size: MarkdownSize
 ):
-    (summary_line, summary_table) = generate_summary_table(
+    (summary_line, summary_table) = generate_summary_table_and_chart(
         chart_data, name, markdown_size
     )
 
@@ -402,6 +396,4 @@ def generate_markdown(
         )
         generated_markdown += "\n# Details\n" f"{markdown_details}\n"
 
-    failures_section = generate_failures_section(failures)
-
-    return failures_section + generated_markdown
+    return generated_markdown
diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
index 9f0381ceef6c2..99ba0caab55c2 100644
--- a/devops/scripts/benchmarks/requirements.txt
+++ b/devops/scripts/benchmarks/requirements.txt
@@ -2,4 +2,3 @@ matplotlib==3.9.2
 mpld3==0.5.10
 dataclasses-json==0.6.7
 PyYAML==6.0.1
-Mako==1.3.9

From 5cc02c544fee20568753192fcaa30d4fff22b719 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 21 Mar 2025 18:14:10 -0700
Subject: [PATCH 69/79] Revert changes to html

---
 devops/scripts/benchmarks/html/config.js  |   2 -
 devops/scripts/benchmarks/html/data.js    |   3 -
 devops/scripts/benchmarks/html/index.html |  78 ---
 devops/scripts/benchmarks/html/scripts.js | 812 ----------------------
 devops/scripts/benchmarks/html/styles.css | 357 ----------
 5 files changed, 1252 deletions(-)
 delete mode 100644 devops/scripts/benchmarks/html/config.js
 delete mode 100644 devops/scripts/benchmarks/html/data.js
 delete mode 100644 devops/scripts/benchmarks/html/index.html
 delete mode 100644 devops/scripts/benchmarks/html/scripts.js
 delete mode 100644 devops/scripts/benchmarks/html/styles.css

diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
deleted file mode 100644
index 3e67ae1dce8e5..0000000000000
--- a/devops/scripts/benchmarks/html/config.js
+++ /dev/null
@@ -1,2 +0,0 @@
-//remoteDataUrl = 'https://example.com/data.json';
-//defaultCompareNames = ['baseline'];
diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
deleted file mode 100644
index a5b96c72834ba..0000000000000
--- a/devops/scripts/benchmarks/html/data.js
+++ /dev/null
@@ -1,3 +0,0 @@
-benchmarkRuns = [];
-
-defaultCompareNames = [];
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
deleted file mode 100644
index ba8e77c6aff9e..0000000000000
--- a/devops/scripts/benchmarks/html/index.html
+++ /dev/null
@@ -1,78 +0,0 @@
-<!--
-  Copyright (C) 2024-2025 Intel Corporation
-  Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-  See LICENSE.TXT
-  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
--->
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <title>Benchmark Results</title>
-    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns"></script>
-    <script src="data.js"></script>
-    <script src="config.js"></script>
-    <script src="scripts.js"></script>
-    <link rel="stylesheet" href="styles.css">
-</head>
-<body>
-    <div class="container">
-        <h1>Benchmark Results</h1>
-        <div id="loading-indicator" class="loading-indicator" style="display: none;">
-            Loading data, please wait...
-        </div>
-        <div class="filter-container">
-            <input type="text" id="bench-filter" placeholder="Regex...">
-        </div>
-        <div class="run-selector">
-            <select id="run-select">
-                <option value="">Select a run to compare...</option>
-            </select>
-            <button onclick="addSelectedRun()">Add</button>
-            <div id="selected-runs" class="selected-runs"></div>
-        </div>
-        <details class="options-container">
-            <summary>Options</summary>
-            <div class="options-content">
-                <div class="filter-section">
-                    <h3>Display Options</h3>
-                    <div class="display-options">
-                        <label>
-                            <input type="checkbox" id="show-notes" checked>
-                            Director's commentary
-                        </label>
-                        <label>
-                            <input type="checkbox" id="show-unstable">
-                            Show 'it works on my machine' scenarios
-                        </label>
-                    </div>
-                </div>
-
-                <div class="filter-section">
-                    <h3>Suites</h3>
-                    <div id="suite-filters">
-                        <!-- Suite checkboxes will be generated by JavaScript -->
-                    </div>
-                </div>
-
-                <div class="filter-section">
-                    <h3>Tags <button class="tag-action-button" onclick="toggleAllTags(false)">Clear All</button></h3>
-                    <div id="tag-filters">
-                        <!-- Tag checkboxes will be generated by JavaScript -->
-                    </div>
-                </div>
-            </div>
-        </details>
-        <details class="timeseries">
-            <summary>Historical Results</summary>
-            <div class="charts"></div>
-        </details>
-        <details class="bar-charts">
-            <summary>Comparisons</summary>
-            <div class="charts"></div>
-        </details>
-    </div>
-</body>
-</html>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
deleted file mode 100644
index e09b420e95f21..0000000000000
--- a/devops/scripts/benchmarks/html/scripts.js
+++ /dev/null
@@ -1,812 +0,0 @@
-// Copyright (C) 2024-2025 Intel Corporation
-// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-// See LICENSE.TXT
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// Core state
-let activeRuns = new Set(defaultCompareNames);
-let chartInstances = new Map();
-let suiteNames = new Set();
-let timeseriesData, barChartsData, allRunNames;
-let activeTags = new Set();
-
-// DOM Elements
-let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer;
-
-// Run selector functions
-function updateSelectedRuns(forceUpdate = true) {
-    selectedRunsDiv.innerHTML = '';
-    activeRuns.forEach(name => {
-        selectedRunsDiv.appendChild(createRunElement(name));
-    });
-    if (forceUpdate)
-        updateCharts();
-}
-
-function createRunElement(name) {
-    const runElement = document.createElement('span');
-    runElement.className = 'selected-run';
-    runElement.innerHTML = `${name} <button onclick="removeRun('${name}')">X</button>`;
-    return runElement;
-}
-
-function addSelectedRun() {
-    const selectedRun = runSelect.value;
-    if (selectedRun && !activeRuns.has(selectedRun)) {
-        activeRuns.add(selectedRun);
-        updateSelectedRuns();
-    }
-}
-
-function removeRun(name) {
-    activeRuns.delete(name);
-    updateSelectedRuns();
-}
-
-// Chart creation and update
-function createChart(data, containerId, type) {
-    if (chartInstances.has(containerId)) {
-        chartInstances.get(containerId).destroy();
-    }
-
-    const ctx = document.getElementById(containerId).getContext('2d');
-    const options = {
-        responsive: true,
-        plugins: {
-            title: {
-                display: true,
-                text: data.label
-            },
-            subtitle: {
-                display: true,
-                text: data.lower_is_better ? "Lower is better" : "Higher is better"
-            },
-            tooltip: {
-                callbacks: {
-                    label: (context) => {
-                        if (type === 'time') {
-                            const point = context.raw;
-                            return [
-                                `${data.label}:`,
-                                `Value: ${point.y.toFixed(2)} ${data.unit}`,
-                                `Stddev: ${point.stddev.toFixed(2)} ${data.unit}`,
-                                `Git Hash: ${point.gitHash}`,
-                            ];
-                        } else {
-                            return [`${context.dataset.label}:`,
-                                `Value: ${context.parsed.y.toFixed(2)} ${data.unit}`,
-                            ];
-                        }
-                    }
-                }
-            }
-        },
-        scales: {
-            y: {
-                title: {
-                    display: true,
-                    text: data.unit
-                },
-                grace: '20%',
-            }
-        }
-    };
-
-    if (type === 'time') {
-        options.interaction = {
-            mode: 'nearest',
-            intersect: false
-        };
-        options.onClick = (event, elements) => {
-            if (elements.length > 0) {
-                const point = elements[0].element.$context.raw;
-                if (point.gitHash && point.gitRepo) {
-                    window.open(`https://github.com/${point.gitRepo}/commit/${point.gitHash}`, '_blank');
-                }
-            }
-        };
-        options.scales.x = {
-            type: 'timeseries',
-            time: {
-                unit: 'day'
-            },
-            ticks: {
-                maxRotation: 45,
-                minRotation: 45,
-                autoSkip: true,
-                maxTicksLimit: 10
-            }
-        };
-    }
-
-    const chartConfig = {
-        type: type === 'time' ? 'line' : 'bar',
-        data: type === 'time' ? {
-            datasets: createTimeseriesDatasets(data)
-        } : {
-            labels: data.labels,
-            datasets: data.datasets
-        },
-        options: options
-    };
-
-    const chart = new Chart(ctx, chartConfig);
-    chartInstances.set(containerId, chart);
-    return chart;
-}
-
-function createTimeseriesDatasets(data) {
-    return Object.entries(data.runs).map(([name, points]) => ({
-        label: name,
-        data: points.map(p => ({
-            x: new Date(p.date),
-            y: p.value,
-            gitHash: p.git_hash,
-            gitRepo: p.github_repo,
-            stddev: p.stddev
-        })),
-        borderWidth: 1,
-        pointRadius: 3,
-        pointStyle: 'circle',
-        pointHoverRadius: 5
-    }));
-}
-
-function updateCharts() {
-    // Filter data by active runs
-    const filteredTimeseriesData = timeseriesData.map(chart => ({
-        ...chart,
-        runs: Object.fromEntries(
-            Object.entries(chart.runs).filter(([name]) => activeRuns.has(name))
-        )
-    }));
-
-    const filteredBarChartsData = barChartsData.map(chart => ({
-        ...chart,
-        labels: chart.labels.filter(label => activeRuns.has(label)),
-        datasets: chart.datasets.map(dataset => ({
-            ...dataset,
-            data: dataset.data.filter((_, i) => activeRuns.has(chart.labels[i]))
-        }))
-    }));
-
-    // Draw charts with filtered data
-    drawCharts(filteredTimeseriesData, filteredBarChartsData);
-}
-
-function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
-    // Clear existing charts
-    document.querySelectorAll('.charts').forEach(container => container.innerHTML = '');
-    chartInstances.forEach(chart => chart.destroy());
-    chartInstances.clear();
-
-    // Create timeseries charts
-    filteredTimeseriesData.forEach((data, index) => {
-        const containerId = `timeseries-${index}`;
-        const container = createChartContainer(data, containerId, 'benchmark');
-        document.querySelector('.timeseries .charts').appendChild(container);
-        createChart(data, containerId, 'time');
-    });
-
-    // Create bar charts
-    filteredBarChartsData.forEach((data, index) => {
-        const containerId = `barchart-${index}`;
-        const container = createChartContainer(data, containerId, 'group');
-        document.querySelector('.bar-charts .charts').appendChild(container);
-        createChart(data, containerId, 'bar');
-    });
-
-    // Apply current filters
-    filterCharts();
-}
-
-function createChartContainer(data, canvasId, type) {
-    const container = document.createElement('div');
-    container.className = 'chart-container';
-    container.setAttribute('data-label', data.label);
-    container.setAttribute('data-suite', data.suite);
-
-    // Check if this benchmark is marked as unstable
-    const metadata = metadataForLabel(data.label, type);
-    if (metadata && metadata.unstable) {
-        container.setAttribute('data-unstable', 'true');
-
-        // Add unstable warning
-        const unstableWarning = document.createElement('div');
-        unstableWarning.className = 'benchmark-unstable';
-        unstableWarning.textContent = metadata.unstable;
-        unstableWarning.style.display = isUnstableEnabled() ? 'block' : 'none';
-        container.appendChild(unstableWarning);
-    }
-
-    // Add description if present in metadata (moved outside of details)
-    if (metadata && metadata.description) {
-        const descElement = document.createElement('div');
-        descElement.className = 'benchmark-description';
-        descElement.textContent = metadata.description;
-        container.appendChild(descElement);
-    }
-
-    // Add notes if present
-    if (metadata && metadata.notes) {
-        const noteElement = document.createElement('div');
-        noteElement.className = 'benchmark-note';
-        noteElement.textContent = metadata.notes;
-        noteElement.style.display = isNotesEnabled() ? 'block' : 'none';
-        container.appendChild(noteElement);
-    }
-
-    // Add tags if present
-    if (metadata && metadata.tags) {
-        container.setAttribute('data-tags', metadata.tags.join(','));
-        
-        // Add tags display
-        const tagsContainer = document.createElement('div');
-        tagsContainer.className = 'benchmark-tags';
-        
-        metadata.tags.forEach(tag => {
-            const tagElement = document.createElement('span');
-            tagElement.className = 'tag';
-            tagElement.textContent = tag;
-            tagElement.setAttribute('data-tag', tag);
-            
-            // Add tooltip with tag description
-            if (benchmarkTags[tag]) {
-                tagElement.setAttribute('title', benchmarkTags[tag].description);
-            }
-            
-            tagsContainer.appendChild(tagElement);
-        });
-        
-        container.appendChild(tagsContainer);
-    }
-
-    const canvas = document.createElement('canvas');
-    canvas.id = canvasId;
-    container.appendChild(canvas);
-
-    // Create details section for extra info
-    const details = document.createElement('details');
-    const summary = document.createElement('summary');
-    summary.textContent = "Details";
-
-    // Add subtle download button to the summary
-    const downloadButton = document.createElement('button');
-    downloadButton.className = 'download-button';
-    downloadButton.textContent = 'Download';
-    downloadButton.onclick = (event) => {
-        event.stopPropagation(); // Prevent details toggle
-        downloadChart(canvasId, data.label);
-    };
-    summary.appendChild(downloadButton);
-    details.appendChild(summary);
-
-    // Create and append extra info
-    const extraInfo = document.createElement('div');
-    extraInfo.className = 'extra-info';
-    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
-    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data, 'benchmark');
-    details.appendChild(extraInfo);
-
-    container.appendChild(details);
-
-    return container;
-}
-
-function metadataForLabel(label, type) {
-    for (const [key, metadata] of Object.entries(benchmarkMetadata)) {
-        if (metadata.type === type && label.startsWith(key)) {
-            return metadata;
-        }
-    }
-
-    return null;
-}
-
-// Pre-compute a lookup for the latest run per label
-function createLatestRunsLookup(benchmarkRuns) {
-    const latestRunsMap = new Map();
-
-    benchmarkRuns.forEach(run => {
-        // Yes, we need to convert the date every time. I checked.
-        const runDate = new Date(run.date);
-        run.results.forEach(result => {
-            const label = result.label;
-            if (!latestRunsMap.has(label) || runDate > new Date(latestRunsMap.get(label).date)) {
-                latestRunsMap.set(label, {
-                    run,
-                    result
-                });
-            }
-        });
-    });
-
-    return latestRunsMap;
-}
-
-function generateExtraInfo(latestRunsLookup, data, type) {
-    const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
-
-    return labels.map(label => {
-        const metadata = metadataForLabel(label, type);
-        const latestRun = latestRunsLookup.get(label);
-
-        let html = '<div class="extra-info-entry">';
-
-        if (metadata) {
-            html += `<strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>`;
-
-            if (metadata.description) {
-                html += `<em>Description:</em> ${metadata.description}`;
-            }
-
-            if (metadata.notes) {
-                html += `<br><em>Notes:</em> <span class="note-text">${metadata.notes}</span>`;
-            }
-
-            if (metadata.unstable) {
-                html += `<br><em class="unstable-warning">⚠️ Unstable:</em> <span class="unstable-text">${metadata.unstable}</span>`;
-            }
-        } else {
-            html += `<strong>${label}:</strong> No data available`;
-        }
-
-        html += '</div>';
-        return html;
-    }).join('');
-}
-
-function formatCommand(run) {
-    const envVars = Object.entries(run.env || {}).map(([key, value]) => `${key}=${value}`).join(' ');
-    let command = run.command ? [...run.command] : [];
-
-    return `${envVars} ${command.join(' ')}`.trim();
-}
-
-function downloadChart(canvasId, label) {
-    const chart = chartInstances.get(canvasId);
-    if (chart) {
-        const link = document.createElement('a');
-        link.href = chart.toBase64Image('image/png', 1)
-        link.download = `${label}.png`;
-        link.click();
-    }
-}
-
-// URL and filtering functions
-function getQueryParam(param) {
-    const urlParams = new URLSearchParams(window.location.search);
-    return urlParams.get(param);
-}
-
-function updateURL() {
-    const url = new URL(window.location);
-    const regex = document.getElementById('bench-filter').value;
-    const activeSuites = getActiveSuites();
-    const activeRunsList = Array.from(activeRuns);
-    const activeTagsList = Array.from(activeTags);
-
-    if (regex) {
-        url.searchParams.set('regex', regex);
-    } else {
-        url.searchParams.delete('regex');
-    }
-
-    if (activeSuites.length > 0 && activeSuites.length != suiteNames.size) {
-        url.searchParams.set('suites', activeSuites.join(','));
-    } else {
-        url.searchParams.delete('suites');
-    }
-
-    // Add tags to URL
-    if (activeTagsList.length > 0) {
-        url.searchParams.set('tags', activeTagsList.join(','));
-    } else {
-        url.searchParams.delete('tags');
-    }
-
-    // Handle the runs parameter
-    if (activeRunsList.length > 0) {
-        // Check if the active runs are the same as default runs
-        const defaultRuns = new Set(defaultCompareNames || []);
-        const isDefaultRuns = activeRunsList.length === defaultRuns.size &&
-            activeRunsList.every(run => defaultRuns.has(run));
-
-        if (isDefaultRuns) {
-            // If it's just the default runs, omit the parameter entirely
-            url.searchParams.delete('runs');
-        } else {
-            url.searchParams.set('runs', activeRunsList.join(','));
-        }
-    } else {
-        url.searchParams.delete('runs');
-    }
-
-    // Add toggle states to URL
-    url.searchParams.set('notes', isNotesEnabled());
-    url.searchParams.set('unstable', isUnstableEnabled());
-
-    history.replaceState(null, '', url);
-}
-
-function filterCharts() {
-    const regexInput = document.getElementById('bench-filter').value;
-    const regex = new RegExp(regexInput, 'i');
-    const activeSuites = getActiveSuites();
-
-    document.querySelectorAll('.chart-container').forEach(container => {
-        const label = container.getAttribute('data-label');
-        const suite = container.getAttribute('data-suite');
-        const isUnstable = container.getAttribute('data-unstable') === 'true';
-        const tags = container.getAttribute('data-tags') ? 
-                    container.getAttribute('data-tags').split(',') : [];
-
-        // Check if benchmark has all active tags (if any are selected)
-        const hasAllActiveTags = activeTags.size === 0 || 
-                               Array.from(activeTags).every(tag => tags.includes(tag));
-
-        // Hide unstable benchmarks if showUnstable is false
-        const shouldShow = regex.test(label) &&
-            activeSuites.includes(suite) &&
-            (isUnstableEnabled() || !isUnstable) &&
-            hasAllActiveTags;
-
-        container.style.display = shouldShow ? '' : 'none';
-    });
-
-    updateURL();
-}
-
-function getActiveSuites() {
-    return Array.from(document.querySelectorAll('.suite-checkbox:checked'))
-        .map(checkbox => checkbox.getAttribute('data-suite'));
-}
-
-// Data processing
-function processTimeseriesData(benchmarkRuns) {
-    const resultsByLabel = {};
-
-    benchmarkRuns.forEach(run => {
-        const runDate = run.date ? new Date(run.date) : null;
-        run.results.forEach(result => {
-            if (!resultsByLabel[result.label]) {
-                resultsByLabel[result.label] = {
-                    label: result.label,
-                    suite: result.suite,
-                    unit: result.unit,
-                    lower_is_better: result.lower_is_better,
-                    runs: {}
-                };
-            }
-
-            if (!resultsByLabel[result.label].runs[run.name]) {
-                resultsByLabel[result.label].runs[run.name] = [];
-            }
-
-            resultsByLabel[result.label].runs[run.name].push({
-                date: runDate,
-                value: result.value,
-                stddev: result.stddev,
-                git_hash: run.git_hash,
-                github_repo: run.github_repo
-            });
-        });
-    });
-
-    return Object.values(resultsByLabel);
-}
-
-function processBarChartsData(benchmarkRuns) {
-    const groupedResults = {};
-
-    benchmarkRuns.reverse().forEach(run => {
-        run.results.forEach(result => {
-            if (!result.explicit_group) return;
-
-            if (!groupedResults[result.explicit_group]) {
-                // Look up group metadata
-                const groupMetadata = metadataForLabel(result.explicit_group);
-
-                groupedResults[result.explicit_group] = {
-                    label: result.explicit_group,
-                    suite: result.suite,
-                    unit: result.unit,
-                    lower_is_better: result.lower_is_better,
-                    labels: [],
-                    datasets: [],
-                    // Add metadata if available
-                    description: groupMetadata?.description || null,
-                    notes: groupMetadata?.notes || null,
-                    unstable: groupMetadata?.unstable || null
-                };
-            }
-
-            const group = groupedResults[result.explicit_group];
-
-            if (!group.labels.includes(run.name)) {
-                group.labels.push(run.name);
-            }
-
-            let dataset = group.datasets.find(d => d.label === result.label);
-            if (!dataset) {
-                dataset = {
-                    label: result.label,
-                    data: new Array(group.labels.length).fill(null)
-                };
-                group.datasets.push(dataset);
-            }
-
-            const runIndex = group.labels.indexOf(run.name);
-            dataset.data[runIndex] = result.value;
-        });
-    });
-
-    return Object.values(groupedResults);
-}
-
-// Setup functions
-function setupRunSelector() {
-    runSelect = document.getElementById('run-select');
-    selectedRunsDiv = document.getElementById('selected-runs');
-
-    allRunNames.forEach(name => {
-        const option = document.createElement('option');
-        option.value = name;
-        option.textContent = name;
-        runSelect.appendChild(option);
-    });
-
-    updateSelectedRuns(false);
-}
-
-function setupSuiteFilters() {
-    suiteFiltersContainer = document.getElementById('suite-filters');
-
-    benchmarkRuns.forEach(run => {
-        run.results.forEach(result => {
-            suiteNames.add(result.suite);
-        });
-    });
-
-    suiteNames.forEach(suite => {
-        const label = document.createElement('label');
-        const checkbox = document.createElement('input');
-        checkbox.type = 'checkbox';
-        checkbox.className = 'suite-checkbox';
-        checkbox.dataset.suite = suite;
-        checkbox.checked = true;
-        label.appendChild(checkbox);
-        label.appendChild(document.createTextNode(' ' + suite));
-        suiteFiltersContainer.appendChild(label);
-        suiteFiltersContainer.appendChild(document.createTextNode(' '));
-    });
-}
-
-function isNotesEnabled() {
-    const notesToggle = document.getElementById('show-notes');
-    return notesToggle.checked;
-}
-
-function isUnstableEnabled() {
-    const unstableToggle = document.getElementById('show-unstable');
-    return unstableToggle.checked;
-}
-
-function setupToggles() {
-    const notesToggle = document.getElementById('show-notes');
-    const unstableToggle = document.getElementById('show-unstable');
-
-    notesToggle.addEventListener('change', function() {
-        // Update all note elements visibility
-        document.querySelectorAll('.benchmark-note').forEach(note => {
-            note.style.display = isNotesEnabled() ? 'block' : 'none';
-        });
-    });
-
-    unstableToggle.addEventListener('change', function() {
-        // Update all unstable warning elements visibility
-        document.querySelectorAll('.benchmark-unstable').forEach(warning => {
-            warning.style.display = isUnstableEnabled() ? 'block' : 'none';
-        });
-        filterCharts();
-    });
-
-    // Initialize from URL params if present
-    const notesParam = getQueryParam('notes');
-    const unstableParam = getQueryParam('unstable');
-
-    if (notesParam !== null) {
-        let showNotes = notesParam === 'true';
-        notesToggle.checked = showNotes;
-    }
-
-    if (unstableParam !== null) {
-        let showUnstable = unstableParam === 'true';
-        unstableToggle.checked = showUnstable;
-    }
-}
-
-function setupTagFilters() {
-    tagFiltersContainer = document.getElementById('tag-filters');
-
-    const allTags = [];
-    
-    if (benchmarkTags) {
-        for (const tag in benchmarkTags) {
-            if (!allTags.includes(tag)) {
-                allTags.push(tag);
-            }
-        }
-    }
-
-    // Create tag filter elements
-    allTags.forEach(tag => {
-        const tagContainer = document.createElement('div');
-        tagContainer.className = 'tag-filter';
-        
-        const checkbox = document.createElement('input');
-        checkbox.type = 'checkbox';
-        checkbox.id = `tag-${tag}`;
-        checkbox.className = 'tag-checkbox';
-        checkbox.dataset.tag = tag;
-        
-        const label = document.createElement('label');
-        label.htmlFor = `tag-${tag}`;
-        label.textContent = tag;
-        
-        // Add info icon with tooltip if tag description exists
-        if (benchmarkTags[tag]) {
-            const infoIcon = document.createElement('span');
-            infoIcon.className = 'tag-info';
-            infoIcon.textContent = 'ⓘ';
-            infoIcon.title = benchmarkTags[tag].description;
-            label.appendChild(infoIcon);
-        }
-        
-        checkbox.addEventListener('change', function() {
-            if (this.checked) {
-                activeTags.add(tag);
-            } else {
-                activeTags.delete(tag);
-            }
-            filterCharts();
-        });
-        
-        tagContainer.appendChild(checkbox);
-        tagContainer.appendChild(label);
-        tagFiltersContainer.appendChild(tagContainer);
-    });
-}
-
-function toggleAllTags(select) {
-    const checkboxes = document.querySelectorAll('.tag-checkbox');
-    
-    checkboxes.forEach(checkbox => {
-        checkbox.checked = select;
-        const tag = checkbox.dataset.tag;
-        
-        if (select) {
-            activeTags.add(tag);
-        } else {
-            activeTags.delete(tag);
-        }
-    });
-    
-    filterCharts();
-}
-
-function initializeCharts() {
-    // Process raw data
-    timeseriesData = processTimeseriesData(benchmarkRuns);
-    barChartsData = processBarChartsData(benchmarkRuns);
-    allRunNames = [...new Set(benchmarkRuns.map(run => run.name))];
-
-    // Set up active runs
-    const runsParam = getQueryParam('runs');
-    if (runsParam) {
-        const runsFromUrl = runsParam.split(',');
-
-        // Start with an empty set
-        activeRuns = new Set();
-
-        // Process each run from URL
-        runsFromUrl.forEach(run => {
-            if (run === 'default') {
-                // Special case: include all default runs
-                (defaultCompareNames || []).forEach(defaultRun => {
-                    if (allRunNames.includes(defaultRun)) {
-                        activeRuns.add(defaultRun);
-                    }
-                });
-            } else if (allRunNames.includes(run)) {
-                // Add the specific run if it exists
-                activeRuns.add(run);
-            }
-        });
-    } else {
-        // No runs parameter, use defaults
-        activeRuns = new Set(defaultCompareNames || []);
-    }
-
-    // Setup UI components
-    setupRunSelector();
-    setupSuiteFilters();
-    setupTagFilters();
-    setupToggles();
-
-    // Apply URL parameters
-    const regexParam = getQueryParam('regex');
-    const suitesParam = getQueryParam('suites');
-    const tagsParam = getQueryParam('tags');
-
-    if (regexParam) {
-        document.getElementById('bench-filter').value = regexParam;
-    }
-
-    if (suitesParam) {
-        const suites = suitesParam.split(',');
-        document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
-            checkbox.checked = suites.includes(checkbox.getAttribute('data-suite'));
-        });
-    }
-
-    // Apply tag filters from URL
-    if (tagsParam) {
-        const tags = tagsParam.split(',');
-        tags.forEach(tag => {
-            const checkbox = document.querySelector(`.tag-checkbox[data-tag="${tag}"]`);
-            if (checkbox) {
-                checkbox.checked = true;
-                activeTags.add(tag);
-            }
-        });
-    }
-
-    // Setup event listeners
-    document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
-        checkbox.addEventListener('change', filterCharts);
-    });
-    document.getElementById('bench-filter').addEventListener('input', filterCharts);
-
-    // Draw initial charts
-    updateCharts();
-}
-
-// Make functions available globally for onclick handlers
-window.addSelectedRun = addSelectedRun;
-window.removeRun = removeRun;
-window.toggleAllTags = toggleAllTags;
-
-// Load data based on configuration
-function loadData() {
-    const loadingIndicator = document.getElementById('loading-indicator');
-    loadingIndicator.style.display = 'block'; // Show loading indicator
-
-    if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') {
-        // Fetch data from remote URL
-        fetch(remoteDataUrl)
-            .then(response => response.json())
-            .then(data => {
-                benchmarkRuns = data.runs || data;
-                benchmarkMetadata = data.metadata || benchmarkMetadata || {};
-                benchmarkTags = data.tags || benchmarkTags || {};
-                initializeCharts();
-            })
-            .catch(error => {
-                console.error('Error fetching remote data:', error);
-                loadingIndicator.textContent = 'Fetching remote data failed.';
-            })
-            .finally(() => {
-                loadingIndicator.style.display = 'none'; // Hide loading indicator
-            });
-    } else {
-        // Use local data (benchmarkRuns and benchmarkMetadata should be defined in data.js)
-        initializeCharts();
-        loadingIndicator.style.display = 'none'; // Hide loading indicator
-    }
-}
-
-// Initialize when DOM is ready
-document.addEventListener('DOMContentLoaded', () => {
-    loadData();
-});
diff --git a/devops/scripts/benchmarks/html/styles.css b/devops/scripts/benchmarks/html/styles.css
deleted file mode 100644
index 3e9c3bd22fc37..0000000000000
--- a/devops/scripts/benchmarks/html/styles.css
+++ /dev/null
@@ -1,357 +0,0 @@
-body {
-    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-    margin: 0;
-    padding: 16px;
-    background: #f8f9fa;
-}
-.container {
-    max-width: 1100px;
-    margin: 0 auto;
-}
-h1, h2 {
-    color: #212529;
-    text-align: center;
-    margin-bottom: 24px;
-    font-weight: 500;
-}
-.chart-container {
-    background: white;
-    border-radius: 8px;
-    padding: 24px;
-    margin-bottom: 24px;
-    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-}
-@media (max-width: 768px) {
-    body {
-        padding: 12px;
-    }
-    .chart-container {
-        padding: 16px;
-        border-radius: 6px;
-    }
-    h1 {
-        font-size: 24px;
-        margin-bottom: 16px;
-    }
-}
-.filter-container {
-    text-align: center;
-    margin-bottom: 24px;
-}
-.filter-container input {
-    padding: 8px;
-    font-size: 16px;
-    border: 1px solid #ccc;
-    border-radius: 4px;
-    width: 400px;
-    max-width: 100%;
-}
-.suite-filter-container {
-    text-align: center;
-    margin-bottom: 24px;
-    padding: 16px;
-    background: #e9ecef;
-    border-radius: 8px;
-}
-.suite-checkbox {
-    margin: 0 8px;
-}
-details {
-    margin-bottom: 24px;
-}
-summary {
-    display: flex;
-    justify-content: space-between;
-    align-items: center;
-    font-size: 16px;
-    font-weight: 500;
-    cursor: pointer;
-    padding: 12px 16px;
-    background: #dee2e6;
-    border-radius: 8px;
-    user-select: none;
-}
-summary:hover {
-    background: #ced4da;
-}
-summary::marker {
-    display: none;
-}
-summary::-webkit-details-marker {
-    display: none;
-}
-summary::after {
-    content: "▼";
-    font-size: 12px;
-    margin-left: 8px;
-    transition: transform 0.3s;
-}
-details[open] summary::after {
-    transform: rotate(180deg);
-}
-.extra-info {
-    padding: 8px;
-    background: #f8f9fa;
-    border-radius: 8px;
-    margin-top: 8px;
-}
-.run-selector {
-    text-align: center;
-    margin-bottom: 24px;
-    padding: 16px;
-    background: #e9ecef;
-    border-radius: 8px;
-}
-.run-selector select {
-    width: 300px;
-    padding: 8px;
-    margin-right: 8px;
-}
-.run-selector button {
-    padding: 8px 16px;
-    background: #0068B5;
-    color: white;
-    border: none;
-    border-radius: 4px;
-    cursor: pointer;
-}
-.run-selector button:hover {
-    background: #00C7FD;
-}
-.selected-runs {
-    margin-top: 12px;
-}
-.selected-run {
-    display: inline-block;
-    padding: 4px 8px;
-    margin: 4px;
-    background: #e2e6ea;
-    border-radius: 4px;
-}
-.selected-run button {
-    margin-left: 8px;
-    padding: 0 4px;
-    background: none;
-    border: none;
-    color: #dc3545;
-    cursor: pointer;
-}
-.download-button {
-    background: none;
-    border: none;
-    color: #0068B5;
-    cursor: pointer;
-    font-size: 16px;
-    padding: 4px;
-    margin-left: 8px;
-}
-.download-button:hover {
-    color: #00C7FD;
-}
-.loading-indicator {
-    text-align: center;
-    font-size: 18px;
-    color: #0068B5;
-    margin-bottom: 20px;
-}
-.extra-info-entry {
-    border: 1px solid #ddd;
-    padding: 10px;
-    margin-bottom: 10px;
-    background-color: #f9f9f9;
-    border-radius: 5px;
-}
-.extra-info-entry strong {
-    display: block;
-    margin-bottom: 5px;
-}
-.extra-info-entry em {
-    color: #555;
-}
-.display-options-container {
-    text-align: center;
-    margin-bottom: 24px;
-    padding: 16px;
-    background: #e9ecef;
-    border-radius: 8px;
-}
-.display-options-container label {
-    margin: 0 12px;
-    cursor: pointer;
-}
-.display-options-container input {
-    margin-right: 8px;
-}
-.benchmark-note {
-    background-color: #cfe2ff;
-    color: #084298;
-    padding: 10px;
-    margin-bottom: 10px;
-    border-radius: 5px;
-    border-left: 4px solid #084298;
-    white-space: pre-line;
-}
-.benchmark-unstable {
-    background-color: #f8d7da;
-    color: #842029;
-    padding: 10px;
-    margin-bottom: 10px;
-    border-radius: 5px;
-    border-left: 4px solid #842029;
-    white-space: pre-line;
-}
-.note-text {
-    color: #084298;
-}
-.unstable-warning {
-    color: #842029;
-    font-weight: bold;
-}
-.unstable-text {
-    color: #842029;
-}
-.options-container {
-    margin-bottom: 24px;
-    background: #e9ecef;
-    border-radius: 8px;
-    overflow: hidden;
-}
-.options-container summary {
-    padding: 12px 16px;
-    font-weight: 500;
-    cursor: pointer;
-    background: #dee2e6;
-    user-select: none;
-}
-.options-container summary:hover {
-    background: #ced4da;
-}
-.options-content {
-    padding: 16px;
-    display: flex;
-    flex-wrap: wrap;
-    gap: 24px;
-}
-.filter-section {
-    flex: 1;
-    min-width: 300px;
-}
-.filter-section h3 {
-    margin-top: 0;
-    margin-bottom: 12px;
-    font-size: 18px;
-    font-weight: 500;
-    text-align: left;
-    display: flex;
-    align-items: center;
-}
-#suite-filters {
-    display: flex;
-    flex-wrap: wrap;
-    max-height: 200px;
-    overflow-y: auto;
-    border: 1px solid #dee2e6;
-    border-radius: 4px;
-    padding: 8px;
-    background-color: #f8f9fa;
-}
-.display-options {
-    display: flex;
-    flex-direction: column;
-    gap: 8px;
-}
-.display-options label {
-    display: flex;
-    align-items: center;
-    cursor: pointer;
-}
-.display-options input {
-    margin-right: 8px;
-}
-.benchmark-description {
-    background-color: #f2f2f2;
-    color: #333;
-    padding: 10px;
-    margin-bottom: 10px;
-    border-radius: 5px;
-    border-left: 4px solid #6c757d;
-    white-space: pre-line;
-    font-style: italic;
-}
-/* Tag styles */
-.benchmark-tags {
-    display: flex;
-    flex-wrap: wrap;
-    gap: 4px;
-    margin-bottom: 10px;
-}
-
-.tag {
-    display: inline-block;
-    background-color: #e2e6ea;
-    color: #495057;
-    padding: 2px 8px;
-    border-radius: 12px;
-    font-size: 12px;
-    cursor: help;
-}
-
-.tag-filter {
-    display: inline-flex;
-    align-items: center;
-    margin: 4px;
-}
-
-.tag-filter label {
-    margin-left: 4px;
-    cursor: pointer;
-    display: flex;
-    align-items: center;
-}
-
-.tag-info {
-    color: #0068B5;
-    margin-left: 4px;
-    cursor: help;
-    font-size: 12px;
-}
-
-#tag-filters {
-    display: flex;
-    flex-wrap: wrap;
-    max-height: 200px;
-    overflow-y: auto;
-    border: 1px solid #dee2e6;
-    border-radius: 4px;
-    padding: 8px;
-    background-color: #f8f9fa;
-}
-
-.tag-action-button {
-    padding: 2px 8px;
-    background: #e2e6ea;
-    border: none;
-    border-radius: 4px;
-    cursor: pointer;
-    font-size: 12px;
-    margin-left: 8px;
-    vertical-align: middle;
-}
-
-.tag-action-button:hover {
-    background: #ced4da;
-}
-
-.remove-tag {
-    background: none;
-    border: none;
-    color: white;
-    margin-left: 4px;
-    cursor: pointer;
-    font-size: 16px;
-    padding: 0 4px;
-}
-
-.remove-tag:hover {
-    color: #f8d7da;
-}

From b49ff8856f004c7a0fe4da9ecce1de68dad1dda2 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 21 Mar 2025 18:15:40 -0700
Subject: [PATCH 70/79] Revert presets.py

---
 devops/scripts/benchmarks/presets.py | 38 ----------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 devops/scripts/benchmarks/presets.py

diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
deleted file mode 100644
index 3f191766deb8c..0000000000000
--- a/devops/scripts/benchmarks/presets.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (C) 2025 Intel Corporation
-# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-# See LICENSE.TXT
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-presets: dict[str, list[str]] = {
-    "Full": [
-        "Compute Benchmarks",
-        "llama.cpp bench",
-        "SYCL-Bench",
-        "Velocity Bench",
-        "UMF",
-    ],
-    "SYCL": [
-        "Compute Benchmarks",
-        "llama.cpp bench",
-        "SYCL-Bench",
-        "Velocity Bench",
-    ],
-    "Minimal": [
-        "Compute Benchmarks",
-    ],
-    "Normal": [
-        "Compute Benchmarks",
-        "llama.cpp bench",
-        "Velocity Bench",
-    ],
-    "Test": [
-        "Test Suite",
-    ],
-}
-
-
-def enabled_suites(preset: str) -> list[str]:
-    try:
-        return presets[preset]
-    except KeyError:
-        raise ValueError(f"Preset '{preset}' not found.")

From 9357df2cec9b298b3003dcfa65f14c1b23bc8747 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 21 Mar 2025 18:18:08 -0700
Subject: [PATCH 71/79] Revert benchmark.yml

---
 .github/workflows/benchmark.yml | 129 --------------------------------
 1 file changed, 129 deletions(-)
 delete mode 100644 .github/workflows/benchmark.yml

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
deleted file mode 100644
index 8e860bce6a384..0000000000000
--- a/.github/workflows/benchmark.yml
+++ /dev/null
@@ -1,129 +0,0 @@
-name: Run Benchmarks
-
-on:
-  schedule:
-    - cron: '0 1 * * *'  # 2 hrs earlier than sycl-nightly.yml
-  workflow_call:
-    inputs:
-      commit_hash:
-        type: string
-        required: false
-        default: ''
-      upload_results:
-        type: string # true/false: workflow_dispatch does not support booleans
-        required: true
-      runner:
-        type: string
-        required: true
-      backend:
-        type: string
-        required: true
-      reset_intel_gpu:
-        type: string  # true/false: workflow_dispatch does not support booleans
-        required: true
-        default: true
-
-  workflow_dispatch:
-    inputs:
-      commit_hash:
-        description: Commit hash to build intel/llvm from
-        type: string
-        required: false
-        default: ''
-      upload_results:
-        description: 'Save and upload results'
-        type: choice
-        options:
-          - false
-          - true
-        default: true
-      runner:
-        type: choice
-        options:
-          - '["PVC_PERF"]'
-      backend:
-        description: Backend to use
-        type: choice
-        options:
-          - 'level_zero:gpu'
-        # TODO L0 V2 support
-      reset_intel_gpu:
-        description: Reset Intel GPUs
-        type: choice
-        options:
-          - false
-          - true
-        default: true
-
-permissions: read-all
-
-jobs:
-  build_sycl:
-    name: Build SYCL from PR
-    if: inputs.commit_hash != ''
-    uses: ./.github/workflows/sycl-linux-build.yml
-    with:
-      build_ref: ${{ inputs.commit_hash }}
-      build_cache_root: "/__w/"
-      build_artifact_suffix: "default"
-      build_cache_suffix: "default"
-      # Docker image has last nightly pre-installed and added to the PATH
-      build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
-      cc: clang
-      cxx: clang++
-      changes: '[]'
-
-  run_benchmarks_build:
-    name: Run Benchmarks (on PR Build)
-    needs: [ build_sycl ]
-    if: inputs.commit_hash != ''
-    strategy:
-      matrix:
-        # Set default values if not specified:
-        include:
-          - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
-            backend: ${{ inputs.backend || 'level_zero:gpu' }}
-            reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
-            ref: ${{ inputs.commit_hash }}
-    uses: ./.github/workflows/sycl-linux-run-tests.yml
-    secrets: inherit
-    with:
-      # TODO support other benchmarks
-      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
-      runner: ${{ matrix.runner }}
-      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
-      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
-      target_devices: ${{ matrix.backend }}
-      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
-      tests_selector: benchmark_v2
-      benchmark_upload_results: ${{ inputs.upload_results }}
-      benchmark_build_hash: ${{ inputs.commit_hash }}
-      repo_ref: ${{ matrix.ref }}
-      devops_ref: ${{ github.ref }}
-      sycl_toolchain_artifact: sycl_linux_default
-      sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }}
-      sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }}
-
-  run_benchmarks_nightly:
-    name: Run Benchmarks (on Nightly Build)
-    if: inputs.commit_hash == ''
-    strategy:
-      matrix:
-        # Set default values if not specified:
-        include:
-          - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
-            backend: ${{ inputs.backend || 'level_zero:gpu' }}
-            reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
-    uses: ./.github/workflows/sycl-linux-run-tests.yml
-    secrets: inherit
-    with:
-      # TODO support other benchmarks
-      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
-      runner: ${{ matrix.runner }}
-      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
-      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
-      target_devices: ${{ matrix.backend }}
-      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
-      tests_selector: benchmark_v2
-      benchmark_upload_results: ${{ inputs.upload_results }}
-      repo_ref: ${{ github.ref }}

From 03bfd1534f9d0244337efb66c07c3006a7509491 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 24 Mar 2025 11:24:07 -0700
Subject: [PATCH 72/79] Update imports to reflect result.py move

---
 devops/scripts/benchmarks/history.py         | 2 +-
 devops/scripts/benchmarks/output_html.py     | 2 +-
 devops/scripts/benchmarks/output_markdown.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 7902aa4f04c35..2fc863deb40d9 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -6,7 +6,7 @@
 import os
 import json
 from pathlib import Path
-from benches.result import Result, BenchmarkRun
+from utils.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
 from utils.utils import run
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 4ba395bc3aac6..e9c1f135b70cd 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -11,7 +11,7 @@
 from collections import defaultdict
 from dataclasses import dataclass
 import matplotlib.dates as mdates
-from benches.result import BenchmarkRun, Result
+from utils.result import BenchmarkRun, Result
 import numpy as np
 from string import Template
 
diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
index dd6711cec6365..84af97fc51adb 100644
--- a/devops/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import collections
-from benches.result import Result
+from utils.result import Result
 from options import options, MarkdownSize
 import ast
 

From 0ff0142edc2fb705536414957dec35e792f56662 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 24 Mar 2025 14:55:31 -0700
Subject: [PATCH 73/79] Add benchmark history updates

---
 devops/scripts/benchmarks/history.py | 55 +++++++++++++++++-----------
 devops/scripts/benchmarks/main.py    | 13 ++++++-
 devops/scripts/benchmarks/options.py |  1 +
 3 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 2fc863deb40d9..0b80c54ad7393 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -6,6 +6,7 @@
 import os
 import json
 from pathlib import Path
+import socket
 from utils.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
@@ -13,7 +14,6 @@
 
 
 class BenchmarkHistory:
-    benchmark_run_index_max = 0
     runs = []
 
     def __init__(self, dir):
@@ -35,42 +35,55 @@ def load(self, n: int):
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
 
-        # Extract index numbers and sort files by index number
-        def extract_index(file_path: Path) -> int:
+        # Extract timestamp and sort files by it
+        def extract_timestamp(file_path: Path) -> str:
             try:
-                return int(file_path.stem.split("_")[0])
-            except (IndexError, ValueError):
-                return -1
+                return file_path.stem.split("_")[-1]
+            except IndexError:
+                return ""
 
-        benchmark_files = [
-            file for file in benchmark_files if extract_index(file) != -1
-        ]
-        benchmark_files.sort(key=extract_index)
+        benchmark_files.sort(key=extract_timestamp, reverse=True)
 
         # Load the first n benchmark files
         benchmark_runs = []
-        for file_path in benchmark_files[n::-1]:
+        for file_path in benchmark_files[:n]:
             benchmark_run = self.load_result(file_path)
             if benchmark_run:
                 benchmark_runs.append(benchmark_run)
 
-        if benchmark_files:
-            self.benchmark_run_index_max = extract_index(benchmark_files[-1])
-
         self.runs = benchmark_runs
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
-            result = run("git rev-parse --short HEAD")
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            result = run("git rev-parse --short HEAD", cwd=script_dir)
             git_hash = result.stdout.decode().strip()
+
+            # Get the GitHub repo URL from git remote
+            remote_result = run("git remote get-url origin", cwd=script_dir)
+            remote_url = remote_result.stdout.decode().strip()
+
+            # Convert SSH or HTTPS URL to owner/repo format
+            if remote_url.startswith("git@github.com:"):
+                # SSH format: git@github.com:owner/repo.git
+                github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
+            elif remote_url.startswith("https://github.com/"):
+                # HTTPS format: https://github.com/owner/repo.git
+                github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+            else:
+                github_repo = None
+
         except:
             git_hash = "unknown"
+            github_repo = None
 
         return BenchmarkRun(
             name=name,
             git_hash=git_hash,
+            github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
+            hostname=socket.gethostname(),
         )
 
     def save(self, save_name, results: list[Result], to_file=True):
@@ -84,12 +97,9 @@ def save(self, save_name, results: list[Result], to_file=True):
         results_dir = Path(os.path.join(self.dir, "results"))
         os.makedirs(results_dir, exist_ok=True)
 
-        self.benchmark_run_index_max += 1
-        file_path = Path(
-            os.path.join(
-                results_dir, f"{self.benchmark_run_index_max}_{save_name}.json"
-            )
-        )
+        # Use formatted timestamp for the filename
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file:
             json.dump(serialized, file, indent=4)
         print(f"Benchmark results saved to {file_path}")
@@ -120,6 +130,7 @@ def compute_average(self, data: list[BenchmarkRun]):
             name=first_run.name,
             git_hash="average",
             date=first_run.date,  # should this be different?
+            hostname=first_run.hostname,
         )
 
         return average_benchmark_run
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 620c72b878137..94e9d2e6eaf92 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -253,7 +253,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if not options.dry_run:
         chart_data = {this_name: results}
 
-    history = BenchmarkHistory(directory)
+    results_dir = directory
+    if options.custom_results_dir:
+        results_dir = Path(options.custom_results_dir)
+    history = BenchmarkHistory(results_dir)
     # limit how many files we load.
     # should this be configurable?
     history.load(1000)
@@ -445,7 +448,12 @@ def validate_and_parse_env_args(env_args):
         help="The name of the results which should be used as a baseline for metrics calculation",
         default=options.current_run_name,
     )
-
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        help="Specify a custom results directory",
+        default=options.custom_results_dir,
+    )
     parser.add_argument(
         "--build-jobs",
         type=int,
@@ -476,6 +484,7 @@ def validate_and_parse_env_args(env_args):
     options.iterations_stddev = args.iterations_stddev
     options.build_igc = args.build_igc
     options.current_run_name = args.relative_perf
+    options.custom_results_dir = args.results_dir
     options.build_jobs = args.build_jobs
 
     if args.build_igc and args.compute_runtime is None:
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 7bbca93a6f4fc..78eda7ae3c88e 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -42,6 +42,7 @@ class Options:
     compute_runtime_tag: str = "25.05.32567.12"
     build_igc: bool = False
     current_run_name: str = "This PR"
+    custom_results_dir = None
     build_jobs: int = multiprocessing.cpu_count()
 
 options = Options()

From 1ef9251a955245fb7f993ce737b1e59a66ad4bee Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 25 Mar 2025 07:45:35 -0700
Subject: [PATCH 74/79] Correct bad conflict resolution over cudnn/cublas flags

---
 devops/scripts/benchmarks/main.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 94e9d2e6eaf92..7f8846ad4292c 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -447,13 +447,25 @@ def validate_and_parse_env_args(env_args):
         type=str,
         help="The name of the results which should be used as a baseline for metrics calculation",
         default=options.current_run_name,
+    ) 
+    parser.add_argument(
+        "--cudnn_directory",
+        type=str,
+        help="Directory for cudnn library",
+        default=None,
+    )
+    parser.add_argument(
+        "--cublas_directory",
+        type=str,
+        help="Directory for cublas library",
+        default=None,
     )
     parser.add_argument(
         "--results-dir",
         type=str,
         help="Specify a custom results directory",
         default=options.custom_results_dir,
-    )
+    ) 
     parser.add_argument(
         "--build-jobs",
         type=int,
@@ -484,6 +496,8 @@ def validate_and_parse_env_args(env_args):
     options.iterations_stddev = args.iterations_stddev
     options.build_igc = args.build_igc
     options.current_run_name = args.relative_perf
+    options.cudnn_directory = args.cudnn_directory
+    options.cublas_directory = args.cublas_directory
     options.custom_results_dir = args.results_dir
     options.build_jobs = args.build_jobs
 

From 31c669550069b2e5db7db7f1b28aa4e850248a6f Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 25 Mar 2025 07:51:49 -0700
Subject: [PATCH 75/79] Remove use of typing to stay consistent across files

---
 devops/scripts/benchmarks/utils/result.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index b9ebfdcb60952..04eb5cf0b25c6 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -4,7 +4,6 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from dataclasses import dataclass, field
-from typing import Optional, Dict, List, Any
 from dataclasses_json import config, dataclass_json
 from datetime import datetime
 
@@ -55,9 +54,9 @@ class BenchmarkTag:
 @dataclass
 class BenchmarkMetadata:
     type: str = "benchmark"  # or 'group'
-    description: Optional[str] = None
-    notes: Optional[str] = None
-    unstable: Optional[str] = None
+    description: str = None
+    notes: str = None
+    unstable: str = None
     tags: list[str] = field(default_factory=list)  # Changed to list of tag names
 
 
@@ -65,6 +64,6 @@ class BenchmarkMetadata:
 @dataclass
 class BenchmarkOutput:
     runs: list[BenchmarkRun]
-    metadata: Dict[str, BenchmarkMetadata]
-    tags: Dict[str, BenchmarkTag]
-    default_compare_names: List[str] = field(default_factory=list)
+    metadata: dict[str, BenchmarkMetadata]
+    tags: dict[str, BenchmarkTag]
+    default_compare_names: list[str] = field(default_factory=list)

From 136f64e97dff620d9b49a862835438f8e68c9667 Mon Sep 17 00:00:00 2001
From: Ian Li <ian.li@intel.com>
Date: Tue, 25 Mar 2025 10:56:23 -0400
Subject: [PATCH 76/79] Remove debug comments

Co-authored-by: Piotr Balcer <piotr.balcer@intel.com>
---
 devops/scripts/benchmarks/utils/result.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 04eb5cf0b25c6..f1a54c48ff778 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -57,7 +57,7 @@ class BenchmarkMetadata:
     description: str = None
     notes: str = None
     unstable: str = None
-    tags: list[str] = field(default_factory=list)  # Changed to list of tag names
+    tags: list[str] = field(default_factory=list)
 
 
 @dataclass_json

From ccb2a9c894cf7b4c16c2d064586a78368f828d4f Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 25 Mar 2025 07:59:20 -0700
Subject: [PATCH 77/79] Remove trailing spaces

---
 devops/scripts/benchmarks/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 7f8846ad4292c..2bb023d0153af 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -447,7 +447,7 @@ def validate_and_parse_env_args(env_args):
         type=str,
         help="The name of the results which should be used as a baseline for metrics calculation",
         default=options.current_run_name,
-    ) 
+    )
     parser.add_argument(
         "--cudnn_directory",
         type=str,
@@ -465,7 +465,7 @@ def validate_and_parse_env_args(env_args):
         type=str,
         help="Specify a custom results directory",
         default=options.custom_results_dir,
-    ) 
+    )
     parser.add_argument(
         "--build-jobs",
         type=int,

From f8ccc30cb6cf7c9deee96ca67161df077f294d9b Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 25 Mar 2025 09:00:48 -0700
Subject: [PATCH 78/79] Specify that git metadata is modifiable

---
 devops/scripts/benchmarks/utils/result.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index f1a54c48ff778..14a2ffa905f34 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -22,12 +22,12 @@ class Result:
     # stddev can be optionally set by the benchmark,
     # if not set, it will be calculated automatically.
     stddev: float = 0.0
+    git_url: str = ""
+    git_hash: str = ""
     # values below should not be set by the benchmark
     name: str = ""
     lower_is_better: bool = True
     suite: str = "Unknown"
-    git_url: str = ""
-    git_hash: str = ""
 
 @dataclass_json
 @dataclass

From c54cd764171093bd189da81c98048e569a8fefdb Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 25 Mar 2025 09:28:07 -0700
Subject: [PATCH 79/79] Remove unused metadata variable for now

---
 devops/scripts/benchmarks/main.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 2bb023d0153af..859aa96e50903 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -167,9 +167,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         TestSuite(),
     ]
 
-    # Collect metadata from all benchmarks without setting them up
-    metadata = collect_metadata(suites)
-
     # If dry run, we're done
     if options.dry_run:
         suites = []