diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index d1bb5fb53b83a..4c2973d250e3d 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -1,16 +1,37 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+from dataclasses import dataclass
 import os
 import shutil
 from pathlib import Path
-from .result import Result
+from utils.result import BenchmarkMetadata, BenchmarkTag, Result
 from options import options
 from utils.utils import download, run
-import urllib.request
-import tarfile
+
+benchmark_tags = [
+    BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
+    BenchmarkTag("UR", "Benchmark uses Unified Runtime API"),
+    BenchmarkTag("L0", "Benchmark uses Level Zero API directly"),
+    BenchmarkTag("UMF", "Benchmark uses Unified Memory Framework directly"),
+    BenchmarkTag("micro", "Microbenchmark focusing on a specific functionality"),
+    BenchmarkTag("application", "Real application-based performance test"),
+    BenchmarkTag("proxy", "Benchmark that simulates real application use-cases"),
+    BenchmarkTag("submit", "Tests kernel submission performance"),
+    BenchmarkTag("math", "Tests math computation performance"),
+    BenchmarkTag("memory", "Tests memory transfer or bandwidth performance"),
+    BenchmarkTag("allocation", "Tests memory allocation performance"),
+    BenchmarkTag("graph", "Tests graph-based execution performance"),
+    BenchmarkTag("latency", "Measures operation latency"),
+    BenchmarkTag("throughput", "Measures operation throughput"),
+    BenchmarkTag("inference", "Tests ML/AI inference performance"),
+    BenchmarkTag("image", "Image processing benchmark"),
+    BenchmarkTag("simulation", "Physics or scientific simulation benchmark"),
+]
+
+benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags}
 
 
 class Benchmark:
@@ -55,19 +76,25 @@ def create_data_path(self, name, skip_data_dir=False):
             data_path = os.path.join(self.directory, name)
         else:
             data_path = os.path.join(self.directory, "data", name)
-            if options.rebuild and Path(data_path).exists():
+            if options.redownload and Path(data_path).exists():
                 shutil.rmtree(data_path)
 
         Path(data_path).mkdir(parents=True, exist_ok=True)
 
         return data_path
 
-    def download(self, name, url, file, untar=False, unzip=False, skip_data_dir=False):
+    def download(
+        self,
+        name,
+        url,
+        file,
+        untar=False,
+        unzip=False,
+        skip_data_dir=False,
+        checksum="",
+    ):
         self.data_path = self.create_data_path(name, skip_data_dir)
-        return download(self.data_path, url, file, untar, unzip)
-
-    def name(self):
-        raise NotImplementedError()
+        return download(self.data_path, url, file, untar, unzip, checksum)
 
     def lower_is_better(self):
         return True
@@ -87,6 +114,30 @@ def stddev_threshold(self):
     def get_suite_name(self) -> str:
         return self.suite.name()
 
+    def name(self):
+        raise NotImplementedError()
+
+    def description(self):
+        return ""
+
+    def notes(self) -> str:
+        return None
+
+    def unstable(self) -> str:
+        return None
+
+    def get_tags(self) -> list[str]:
+        return []
+
+    def get_metadata(self) -> BenchmarkMetadata:
+        return BenchmarkMetadata(
+            type="benchmark",
+            description=self.description(),
+            notes=self.notes(),
+            unstable=self.unstable(),
+            tags=self.get_tags(),
+        )
+
 
 class Suite:
     def benchmarks(self) -> list[Benchmark]:
@@ -97,3 +148,6 @@ def name(self) -> str:
 
     def setup(self):
         return
+
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {}
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 4658a3414e16a..d83a0d081af57 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -8,10 +8,33 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import BenchmarkMetadata, Result
 from options import options
 from enum import Enum
 
+
+class RUNTIMES(Enum):
+    SYCL = "sycl"
+    LEVEL_ZERO = "l0"
+    UR = "ur"
+
+
+def runtime_to_name(runtime: RUNTIMES) -> str:
+    return {
+        RUNTIMES.SYCL: "SYCL",
+        RUNTIMES.LEVEL_ZERO: "Level Zero",
+        RUNTIMES.UR: "Unified Runtime",
+    }[runtime]
+
+
+def runtime_to_tag_name(runtime: RUNTIMES) -> str:
+    return {
+        RUNTIMES.SYCL: "SYCL",
+        RUNTIMES.LEVEL_ZERO: "L0",
+        RUNTIMES.UR: "UR",
+    }[runtime]
+
+
 class ComputeBench(Suite):
     def __init__(self, directory):
         self.directory = directory
@@ -19,6 +42,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "Compute Benchmarks"
 
+    def git_url(self) -> str:
+        return "https://github.com/intel/compute-benchmarks.git"
+
+    def git_hash(self) -> str:
+        return "b5cc46acf61766ab00da04e85bd4da4f7591eb21"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -26,8 +55,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "compute-benchmarks-repo",
-            "https://github.com/intel/compute-benchmarks.git",
-            "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba",
+            self.git_url(),
+            self.git_hash(),
         )
         build_path = create_build_path(self.directory, "compute-benchmarks-build")
 
@@ -47,13 +76,43 @@ def setup(self):
                 f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime",
             ]
 
-        print(f"{self.__class__.__name__}: Run {configure_command}")
         run(configure_command, add_sycl=True)
-        print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j")
-        run(f"cmake --build {build_path} -j", add_sycl=True)
+
+        run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True)
 
         self.built = True
 
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {
+            "SubmitKernel": BenchmarkMetadata(
+                type="group",
+                description="Measures CPU time overhead of submitting kernels through different APIs.",
+                notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
+                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
+                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
+                "Work is ongoing to reduce the overhead of the SYCL API\n",
+                tags=["submit", "micro", "SYCL", "UR", "L0"],
+            ),
+            "SinKernelGraph": BenchmarkMetadata(
+                type="group",
+                unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
+                tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"],
+            ),
+            "SubmitGraph": BenchmarkMetadata(
+                type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
+            ),
+        }
+
+    def enabled_runtimes(self, supported_runtimes=None):
+        # all runtimes in the RUNTIMES enum
+        runtimes = supported_runtimes or list(RUNTIMES)
+
+        # Filter out UR if not available
+        if options.ur is None:
+            runtimes = [r for r in runtimes if r != RUNTIMES.UR]
+
+        return runtimes
+
     def benchmarks(self) -> list[Benchmark]:
         if options.sycl is None:
             return []
@@ -61,11 +120,46 @@ def benchmarks(self) -> list[Benchmark]:
         if options.ur_adapter == "cuda":
             return []
 
-        benches = [
-            SubmitKernelL0(self, 0),
-            SubmitKernelL0(self, 1),
-            SubmitKernelSYCL(self, 0),
-            SubmitKernelSYCL(self, 1),
+        benches = []
+
+        # Add SubmitKernel benchmarks using loops
+        for runtime in self.enabled_runtimes():
+            for in_order_queue in [0, 1]:
+                for measure_completion in [0, 1]:
+                    benches.append(
+                        SubmitKernel(self, runtime, in_order_queue, measure_completion)
+                    )
+
+        # Add SinKernelGraph benchmarks
+        for runtime in self.enabled_runtimes():
+            for with_graphs in [0, 1]:
+                for num_kernels in [5, 100]:
+                    benches.append(
+                        GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels)
+                    )
+
+        # Add ULLS benchmarks
+        for runtime in self.enabled_runtimes([RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO]):
+            benches.append(UllsEmptyKernel(self, runtime, 1000, 256))
+            benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1))
+
+        # Add GraphApiSubmitGraph benchmarks
+        for runtime in self.enabled_runtimes([RUNTIMES.SYCL]):
+            for in_order_queue in [0, 1]:
+                for num_kernels in [4, 10, 32]:
+                    for measure_completion_time in [0, 1]:
+                        benches.append(
+                            GraphApiSubmitGraph(
+                                self,
+                                runtime,
+                                in_order_queue,
+                                num_kernels,
+                                measure_completion_time,
+                            )
+                        )
+
+        # Add other benchmarks
+        benches += [
             QueueInOrderMemcpy(self, 0, "Device", "Device", 1024),
             QueueInOrderMemcpy(self, 0, "Host", "Device", 1024),
             QueueMemcpy(self, "Device", "Device", 1024),
@@ -73,29 +167,14 @@ def benchmarks(self) -> list[Benchmark]:
             ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
             ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
             VectorSum(self),
-            MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
-            MemcpyExecute(self, 400, 8, 1024, 100, 1, 1, 1),
-            MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
-            MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
         ]
 
+        # Add UR-specific benchmarks
         if options.ur is not None:
             benches += [
-                SubmitKernelUR(self, 0, 0),
-                SubmitKernelUR(self, 1, 0),
-                SubmitKernelUR(self, 1, 1),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 100),
+                MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
+                MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
+                MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
             ]
 
         return benches
@@ -130,6 +209,9 @@ def setup(self):
     def explicit_group(self):
         return ""
 
+    def description(self) -> str:
+        return ""
+
     def run(self, env_vars) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
@@ -161,6 +243,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
+                    git_url=self.bench.git_url(),
+                    git_hash=self.bench.git_hash(),
                 )
             )
         return ret
@@ -192,74 +276,52 @@ def teardown(self):
         return
 
 
-class SubmitKernelSYCL(ComputeBenchmark):
-    def __init__(self, bench, ioq):
+class SubmitKernel(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
         self.ioq = ioq
-        super().__init__(bench, "api_overhead_benchmark_sycl", "SubmitKernel")
-
-    def name(self):
-        order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_sycl SubmitKernel {order}"
-
-    def explicit_group(self):
-        return "SubmitKernel"
-
-    def bin_args(self) -> list[str]:
-        return [
-            f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
-            "--MeasureCompletion=0",
-            "--iterations=100000",
-            "--Profiling=0",
-            "--NumKernels=10",
-            "--KernelExecTime=1",
-        ]
+        self.runtime = runtime
+        self.measure_completion = measure_completion
+        super().__init__(
+            bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
+        )
 
-
-class SubmitKernelUR(ComputeBenchmark):
-    def __init__(self, bench, ioq, measureCompletion):
-        self.ioq = ioq
-        self.measureCompletion = measureCompletion
-        super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel")
+    def get_tags(self):
+        return ["submit", "latency", runtime_to_tag_name(self.runtime), "micro"]
 
     def name(self):
         order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_ur SubmitKernel {order}" + (
-            " with measure completion" if self.measureCompletion else ""
-        )
+        completion_str = " with measure completion" if self.measure_completion else ""
+        return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}"
 
     def explicit_group(self):
-        return "SubmitKernel"
-
-    def bin_args(self) -> list[str]:
-        return [
-            f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
-            f"--MeasureCompletion={self.measureCompletion}",
-            "--iterations=100000",
-            "--Profiling=0",
-            "--NumKernels=10",
-            "--KernelExecTime=1",
-        ]
+        return (
+            "SubmitKernel"
+            if self.measure_completion == 0
+            else "SubmitKernel With Completion"
+        )
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        runtime_name = runtime_to_name(self.runtime)
 
-class SubmitKernelL0(ComputeBenchmark):
-    def __init__(self, bench, ioq):
-        self.ioq = ioq
-        super().__init__(bench, "api_overhead_benchmark_l0", "SubmitKernel")
+        completion_desc = ""
+        if self.runtime == RUNTIMES.UR:
+            completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time"
 
-    def name(self):
-        order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_l0 SubmitKernel {order}"
+        l0_specific = ""
+        if self.runtime == RUNTIMES.LEVEL_ZERO:
+            l0_specific = " Uses immediate command lists"
 
-    def explicit_group(self):
-        return "SubmitKernel"
+        return (
+            f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
+            f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}"
+        )
 
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
             "--DiscardEvents=0",
-            "--MeasureCompletion=0",
+            f"--MeasureCompletion={self.measure_completion}",
             "--iterations=100000",
             "--Profiling=0",
             "--NumKernels=10",
@@ -280,6 +342,17 @@ def name(self):
         order = "in order" if self.ioq else "out of order"
         return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+        return (
+            f"Measures SYCL {order} queue overhead for {operation} from {self.source} to "
+            f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads."
+        )
+
+    def get_tags(self):
+        return ["memory", "submit", "latency", "SYCL", "micro"]
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=100000",
@@ -303,6 +376,16 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
     def name(self):
         return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+        return (
+            f"Measures SYCL in-order queue memory copy performance for {operation} from "
+            f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration."
+        )
+
+    def get_tags(self):
+        return ["memory", "latency", "SYCL", "micro"]
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -324,6 +407,15 @@ def __init__(self, bench, source, destination, size):
     def name(self):
         return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        return (
+            f"Measures general SYCL queue memory copy performance from {self.source} to "
+            f"{self.destination} with {self.size} bytes per operation."
+        )
+
+    def get_tags(self):
+        return ["memory", "latency", "SYCL", "micro"]
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -343,10 +435,19 @@ def __init__(self, bench, type, size, placement):
     def name(self):
         return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}"
 
+    def description(self) -> str:
+        return (
+            f"Measures {self.placement} memory bandwidth using {self.type} pattern with "
+            f"{self.size} bytes. Higher values (GB/s) indicate better performance."
+        )
+
     # measurement is in GB/s
     def lower_is_better(self):
         return False
 
+    def get_tags(self):
+        return ["memory", "throughput", "SYCL", "micro"]
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -356,6 +457,7 @@ def bin_args(self) -> list[str]:
             "--useEvents=0",
             "--contents=Zeros",
             "--multiplier=1",
+            "--vectorSize=1",
         ]
 
 
@@ -366,6 +468,15 @@ def __init__(self, bench):
     def name(self):
         return f"miscellaneous_benchmark_sycl VectorSum"
 
+    def description(self) -> str:
+        return (
+            "Measures performance of vector addition across 3D grid (512x256x256 elements) "
+            "using SYCL."
+        )
+
+    def get_tags(self):
+        return ["math", "throughput", "SYCL", "micro"]
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=1000",
@@ -402,6 +513,19 @@ def name(self):
             + (" without events" if not self.useEvents else "")
         )
 
+    def description(self) -> str:
+        src_type = "device" if self.srcUSM == 1 else "host"
+        dst_type = "device" if self.dstUSM == 1 else "host"
+        events = "with" if self.useEvents else "without"
+        return (
+            f"Measures multithreaded memory copy performance with {self.numThreads} threads "
+            f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes "
+            f"from {src_type} to {dst_type} memory {events} events."
+        )
+
+    def get_tags(self):
+        return ["memory", "latency", "UR", "micro"]
+
     def bin_args(self) -> list[str]:
         return [
             "--Ioq=1",
@@ -417,12 +541,6 @@ def bin_args(self) -> list[str]:
         ]
 
 
-class RUNTIMES(Enum):
-    SYCL = "sycl"
-    LEVEL_ZERO = "l0"
-    UR = "ur"
-
-
 class GraphApiSinKernelGraph(ComputeBenchmark):
     def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
         self.withGraphs = withGraphs
@@ -435,9 +553,29 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
     def explicit_group(self):
         return f"SinKernelGraph {self.numKernels}"
 
+    def description(self) -> str:
+        execution = "using graphs" if self.withGraphs else "without graphs"
+        return (
+            f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+            f"sin kernels {execution}. Tests overhead and benefits of graph-based execution."
+        )
+
     def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}"
 
+    def unstable(self) -> str:
+        return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
+
+    def get_tags(self):
+        return [
+            "graph",
+            runtime_to_tag_name(self.runtime),
+            "proxy",
+            "submit",
+            "memory",
+            "latency",
+        ]
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -448,26 +586,115 @@ def bin_args(self) -> list[str]:
         ]
 
 
-class GraphApiSubmitExecGraph(ComputeBenchmark):
-    def __init__(self, bench, ioq, submit, numKernels):
-        self.ioq = ioq
-        self.submit = submit
+class GraphApiSubmitGraph(ComputeBenchmark):
+    def __init__(
+        self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime
+    ):
+        self.inOrderQueue = inOrderQueue
         self.numKernels = numKernels
-        super().__init__(bench, "graph_api_benchmark_sycl", "SubmitExecGraph")
+        self.runtime = runtime
+        self.measureCompletionTime = measureCompletionTime
+        super().__init__(bench, f"graph_api_benchmark_{runtime.value}", "SubmitGraph")
+
+    def explicit_group(self):
+        return f"SubmitGraph {self.numKernels}"
+
+    def description(self) -> str:
+        return (
+            f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+            f"trivial kernels using graphs. Tests overhead and benefits of graph-based execution."
+        )
+
+    def name(self):
+        return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
+
+    def get_tags(self):
+        return [
+            "graph",
+            runtime_to_tag_name(self.runtime),
+            "micro",
+            "submit",
+            "latency",
+        ]
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=10000",
+            f"--NumKernels={self.numKernels}",
+            f"--MeasureCompletionTime={self.measureCompletionTime}",
+            f"--InOrderQueue={self.inOrderQueue}",
+            "--Profiling=0",
+            "--KernelExecutionTime=1",
+        ]
+
+
+class UllsEmptyKernel(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, wgc, wgs):
+        self.wgc = wgc
+        self.wgs = wgs
+        self.runtime = runtime
+        super().__init__(bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel")
+
+    def explicit_group(self):
+        return f"EmptyKernel {self.wgc} {self.wgs}"
+
+    def description(self) -> str:
+        return ""
 
     def name(self):
-        return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{self.ioq}, submit:{self.submit}, numKernels:{self.numKernels}"
+        return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
+
+    def get_tags(self):
+        return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=10000",
+            f"--wgs={self.wgs}",
+            f"--wgc={self.wgs}",
+        ]
+
+
+class UllsKernelSwitch(ComputeBenchmark):
+    def __init__(
+        self,
+        bench,
+        runtime: RUNTIMES,
+        count,
+        kernelTime,
+        barrier,
+        hostVisible,
+        ioq,
+        ctrBasedEvents,
+    ):
+        self.count = count
+        self.kernelTime = kernelTime
+        self.barrier = barrier
+        self.hostVisible = hostVisible
+        self.ctrBasedEvents = ctrBasedEvents
+        self.runtime = runtime
+        self.ioq = ioq
+        super().__init__(bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch")
 
     def explicit_group(self):
-        if self.submit:
-            return "SubmitGraph"
-        else:
-            return "ExecGraph"
+        return f"KernelSwitch {self.count} {self.kernelTime}"
+
+    def description(self) -> str:
+        return ""
+
+    def name(self):
+        return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
+
+    def get_tags(self):
+        return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]
 
     def bin_args(self) -> list[str]:
         return [
-            "--iterations=100",
-            f"--measureSubmit={self.submit}",
+            "--iterations=1000",
+            f"--count={self.count}",
+            f"--kernelTime={self.kernelTime}",
+            f"--barrier={self.barrier}",
+            f"--hostVisible={self.hostVisible}",
             f"--ioq={self.ioq}",
-            f"--numKernels={self.numKernels}",
+            f"--ctrBasedEvents={self.ctrBasedEvents}",
         ]
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 6524c95a9f56f..86d41ed525292 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -8,10 +8,10 @@
 from pathlib import Path
 from utils.utils import download, git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import os
 
 
@@ -25,6 +25,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "llama.cpp bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/ggerganov/llama.cpp"
+
+    def git_hash(self) -> str:
+        return "1ee9eea094fe5846c7d8d770aa7caa749d246b23"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -32,8 +38,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "llamacpp-repo",
-            "https://github.com/ggerganov/llama.cpp",
-            "1ee9eea094fe5846c7d8d770aa7caa749d246b23",
+            self.git_url(),
+            self.git_hash(),
         )
 
         self.models_dir = os.path.join(self.directory, "models")
@@ -43,6 +49,7 @@ def setup(self):
             self.models_dir,
             "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
             "Phi-3-mini-4k-instruct-q4.gguf",
+            checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4",
         )
 
         self.oneapi = get_oneapi()
@@ -62,11 +69,11 @@ def setup(self):
             f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"',
             f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}",
         ]
-        print(f"{self.__class__.__name__}: Run {configure_command}")
+
         run(configure_command, add_sycl=True)
-        print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j")
+
         run(
-            f"cmake --build {self.build_path} -j",
+            f"cmake --build {self.build_path} -j {options.build_jobs}",
             add_sycl=True,
             ld_library=self.oneapi.ld_libraries(),
         )
@@ -92,6 +99,17 @@ def setup(self):
     def name(self):
         return f"llama.cpp"
 
+    def description(self) -> str:
+        return (
+            "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. "
+            "Runs both prompt processing (initial context processing) and text generation benchmarks with "
+            "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct "
+            "quantized model and leverages SYCL with oneDNN for acceleration."
+        )
+
+    def get_tags(self):
+        return ["SYCL", "application", "inference", "throughput"]
+
     def lower_is_better(self):
         return False
 
@@ -130,6 +148,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
+                    git_url=self.bench.git_url(),
+                    git_hash=self.bench.git_hash(),
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/benches/result.py
deleted file mode 100644
index 52a098d91c24a..0000000000000
--- a/devops/scripts/benchmarks/benches/result.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-# See LICENSE.TXT
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-from dataclasses import dataclass
-from typing import Optional
-from dataclasses_json import dataclass_json
-from datetime import datetime
-
-
-@dataclass_json
-@dataclass
-class Result:
-    label: str
-    value: float
-    command: str
-    env: str
-    stdout: str
-    passed: bool = True
-    unit: str = ""
-    explicit_group: str = ""
-    # stddev can be optionally set by the benchmark,
-    # if not set, it will be calculated automatically.
-    stddev: float = 0.0
-    # values below should not be set by the benchmark
-    name: str = ""
-    lower_is_better: bool = True
-    git_hash: str = ""
-    date: Optional[datetime] = None
-    suite: str = "Unknown"
-
-
-@dataclass_json
-@dataclass
-class BenchmarkRun:
-    results: list[Result]
-    name: str = "This PR"
-    git_hash: str = ""
-    date: datetime = None
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index f7cf571a7ecd7..9854c92d338fc 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -8,7 +8,7 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from options import options
 
 
@@ -23,6 +23,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "SYCL-Bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/unisa-hpc/sycl-bench.git"
+
+    def git_hash(self) -> str:
+        return "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -31,8 +37,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "sycl-bench-repo",
-            "https://github.com/mateuszpn/sycl-bench.git",
-            "1e6ab2cfd004a72c5336c26945965017e06eab71",
+            self.git_url(),
+            self.git_hash(),
         )
 
         configure_command = [
@@ -51,7 +57,7 @@ def setup(self):
             ]
 
         run(configure_command, add_sycl=True)
-        run(f"cmake --build {build_path} -j", add_sycl=True)
+        run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True)
 
         self.built = True
 
@@ -65,14 +71,14 @@ def benchmarks(self) -> list[Benchmark]:
             DagTaskS(self),
             HostDevBandwidth(self),
             LocalMem(self),
-            Pattern_L2(self),
-            Reduction(self),
+            # Pattern_L2(self), # validation failure
+            # Reduction(self), # validation failure
             ScalarProd(self),
             SegmentReduction(self),
-            UsmAccLatency(self),
+            # UsmAccLatency(self), # validation failure
             UsmAllocLatency(self),
-            UsmInstrMix(self),
-            UsmPinnedOverhead(self),
+            # UsmInstrMix(self), # validation failure
+            # UsmPinnedOverhead(self), # validation failure
             VecAdd(self),
             # *** sycl-bench single benchmarks
             # TwoDConvolution(self), # run time < 1ms
@@ -82,20 +88,20 @@ def benchmarks(self) -> list[Benchmark]:
             Atax(self),
             # Atomic_reduction(self), # run time < 1ms
             Bicg(self),
-            Correlation(self),
-            Covariance(self),
-            Gemm(self),
-            Gesumv(self),
-            Gramschmidt(self),
+            # Correlation(self), # validation failure
+            # Covariance(self), # validation failure
+            # Gemm(self), # validation failure
+            # Gesumv(self), # validation failure
+            # Gramschmidt(self), # validation failure
             KMeans(self),
             LinRegCoeff(self),
             # LinRegError(self), # run time < 1ms
-            MatmulChain(self),
+            # MatmulChain(self), # validation failure
             MolDyn(self),
-            Mvt(self),
+            # Mvt(self), # validation failure
             Sf(self),
-            Syr2k(self),
-            Syrk(self),
+            # Syr2k(self), # validation failure
+            # Syrk(self), # validation failure
         ]
 
 
@@ -105,7 +111,6 @@ def __init__(self, bench, name, test):
         self.bench = bench
         self.bench_name = name
         self.test = test
-        self.done = False
 
     def bin_args(self) -> list[str]:
         return []
@@ -113,16 +118,26 @@ def bin_args(self) -> list[str]:
     def extra_env_vars(self) -> dict:
         return {}
 
+    def get_tags(self):
+        base_tags = ["SYCL", "micro"]
+        if "Memory" in self.bench_name or "mem" in self.bench_name.lower():
+            base_tags.append("memory")
+        if "Reduction" in self.bench_name:
+            base_tags.append("math")
+        if "Bandwidth" in self.bench_name:
+            base_tags.append("throughput")
+        if "Latency" in self.bench_name:
+            base_tags.append("latency")
+        return base_tags
+
     def setup(self):
         self.benchmark_bin = os.path.join(
             self.directory, "sycl-bench-build", self.bench_name
         )
 
     def run(self, env_vars) -> list[Result]:
-        if self.done:
-            return
         self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
-        print(f"{self.__class__.__name__}: Results in {self.outputfile}")
+
         command = [
             f"{self.benchmark_bin}",
             f"--warmup-run",
@@ -143,25 +158,27 @@ def run(self, env_vars) -> list[Result]:
                 if not row[0].startswith("#"):
                     res_list.append(
                         Result(
-                            label=row[0],
+                            label=f"{self.name()} {row[0]}",
                             value=float(row[12]) * 1000,  # convert to ms
                             passed=(row[1] == "PASS"),
                             command=command,
                             env=env_vars,
                             stdout=row,
                             unit="ms",
+                            git_url=self.bench.git_url(),
+                            git_hash=self.bench.git_hash(),
                         )
                     )
-        self.done = True
-        return res_list
 
-    def teardown(self):
-        print(f"Removing {self.outputfile}...")
         os.remove(self.outputfile)
-        return
+
+        return res_list
 
     def name(self):
-        return self.test
+        return f"{self.bench.name()} {self.test}"
+
+    def teardown(self):
+        return
 
 
 # multi benchmarks
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 06eac12b25344..ad1e8c9e57735 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -6,7 +6,7 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import BenchmarkMetadata, Result
 from utils.utils import run, create_build_path
 from options import options
 import os
@@ -19,35 +19,56 @@ def __init__(self):
     def setup(self):
         return
 
+    def name(self) -> str:
+        return "Test Suite"
+
     def benchmarks(self) -> list[Benchmark]:
         bench_configs = [
-            ("Memory Bandwidth", 2000, 200, "Foo Group"),
-            ("Latency", 100, 20, "Bar Group"),
-            ("Throughput", 1500, 150, "Foo Group"),
-            ("FLOPS", 3000, 300, "Foo Group"),
-            ("Cache Miss Rate", 250, 25, "Bar Group"),
+            ("Memory Bandwidth", 2000, 200, "Foo Group", None, None),
+            ("Latency", 100, 20, "Bar Group", "A Latency test note!", None),
+            ("Throughput", 1500, 150, "Foo Group", None, None),
+            ("FLOPS", 3000, 300, "Foo Group", None, "Unstable FLOPS test!"),
+            ("Cache Miss Rate", 250, 25, "Bar Group", "Test Note", "And another note!"),
         ]
 
         result = []
-        for base_name, base_value, base_diff, group in bench_configs:
+        for base_name, base_value, base_diff, group, notes, unstable in bench_configs:
             for variant in range(6):
                 value_multiplier = 1.0 + (variant * 0.2)
                 name = f"{base_name} {variant+1}"
                 value = base_value * value_multiplier
                 diff = base_diff * value_multiplier
 
-                result.append(TestBench(name, value, diff, group))
+                result.append(
+                    TestBench(self, name, value, diff, group, notes, unstable)
+                )
 
         return result
 
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {
+            "Foo Group": BenchmarkMetadata(
+                type="group",
+                description="This is a test benchmark for Foo Group.",
+                notes="This is a test note for Foo Group.\n" "Look, multiple lines!",
+            ),
+            "Bar Group": BenchmarkMetadata(
+                type="group",
+                description="This is a test benchmark for Bar Group.",
+                unstable="This is an unstable note for Bar Group.",
+            ),
+        }
+
 
 class TestBench(Benchmark):
-    def __init__(self, name, value, diff, group=""):
+    def __init__(self, suite, name, value, diff, group="", notes=None, unstable=None):
+        super().__init__("", suite)
         self.bname = name
         self.value = value
         self.diff = diff
         self.group = group
-        super().__init__("")
+        self.notes_text = notes
+        self.unstable_text = unstable
 
     def name(self):
         return self.bname
@@ -58,6 +79,15 @@ def lower_is_better(self):
     def setup(self):
         return
 
+    def description(self) -> str:
+        return f"This is a test benchmark for {self.bname}."
+
+    def notes(self) -> str:
+        return self.notes_text
+
+    def unstable(self) -> str:
+        return self.unstable_text
+
     def run(self, env_vars) -> list[Result]:
         random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
         return [
@@ -65,7 +95,7 @@ def run(self, env_vars) -> list[Result]:
                 label=self.name(),
                 explicit_group=self.group,
                 value=random_value,
-                command="",
+                command=["test", "--arg1", "foo"],
                 env={"A": "B"},
                 stdout="no output",
                 unit="ms",
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index c7b767f02bbe1..f0b92777dd2f8 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -6,10 +6,10 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import os
 import csv
 import io
@@ -22,8 +22,6 @@ def isUMFAvailable():
 class UMFSuite(Suite):
     def __init__(self, directory):
         self.directory = directory
-        if not isUMFAvailable():
-            print("UMF not provided. Related benchmarks will not run")
 
     def name(self) -> str:
         return "UMF"
@@ -76,6 +74,9 @@ def setup(self):
 
         self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name)
 
+    def get_tags(self):
+        return ["UMF", "allocation", "latency", "micro"]
+
     def run(self, env_vars) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index b7d06cbe4a3a2..493298dea8b10 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -7,10 +7,10 @@
 import shutil
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import shutil
 
 import os
@@ -26,6 +26,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "Velocity Bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/oneapi-src/Velocity-Bench/"
+
+    def git_hash(self) -> str:
+        return "b22215c16f789100449c34bf4eaa3fb178983d69"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -33,8 +39,8 @@ def setup(self):
         self.repo_path = git_clone(
             self.directory,
             "velocity-bench-repo",
-            "https://github.com/oneapi-src/Velocity-Bench/",
-            "b22215c16f789100449c34bf4eaa3fb178983d69",
+            self.git_url(),
+            self.git_hash(),
         )
 
     def benchmarks(self) -> list[Benchmark]:
@@ -101,7 +107,7 @@ def setup(self):
 
         run(configure_command, {"CC": "clang", "CXX": "clang++"}, add_sycl=True)
         run(
-            f"cmake --build {build_path} -j",
+            f"cmake --build {build_path} -j {options.build_jobs}",
             add_sycl=True,
             ld_library=self.ld_libraries(),
         )
@@ -115,6 +121,12 @@ def extra_env_vars(self) -> dict:
     def parse_output(self, stdout: str) -> float:
         raise NotImplementedError()
 
+    def description(self) -> str:
+        return ""
+
+    def get_tags(self):
+        return ["SYCL", "application"]
+
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
 
@@ -133,6 +145,8 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
+                git_url=self.vb.git_url(),
+                git_hash=self.vb.git_hash(),
             )
         ]
 
@@ -147,6 +161,12 @@ def __init__(self, vb: VelocityBench):
     def name(self):
         return "Velocity-Bench Hashtable"
 
+    def description(self) -> str:
+        return (
+            "Measures hash table search performance using an efficient lock-free algorithm with linear probing. "
+            "Reports throughput in millions of keys processed per second. Higher values indicate better performance."
+        )
+
     def bin_args(self) -> list[str]:
         return ["--no-verify"]
 
@@ -162,6 +182,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse keys per second from benchmark output."
             )
 
+    def get_tags(self):
+        return ["SYCL", "application", "throughput"]
+
 
 class Bitcracker(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -170,6 +193,13 @@ def __init__(self, vb: VelocityBench):
     def name(self):
         return "Velocity-Bench Bitcracker"
 
+    def description(self) -> str:
+        return (
+            "Password-cracking application for BitLocker-encrypted memory units. "
+            "Uses dictionary attack to find user or recovery passwords. "
+            "Measures total time required to process 60000 passwords."
+        )
+
     def bin_args(self) -> list[str]:
         self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass")
 
@@ -193,6 +223,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ["SYCL", "application", "throughput"]
+
 
 class SobelFilter(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -204,11 +237,19 @@ def download_deps(self):
             "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=",
             "sobel_filter_data.tgz",
             untar=True,
+            checksum="7fc62aa729792ede80ed8ae70fb56fa443d479139c5888ed4d4047b98caec106687a0f05886a9ced77922ccba7f65e66",
         )
 
     def name(self):
         return "Velocity-Bench Sobel Filter"
 
+    def description(self) -> str:
+        return (
+            "Popular RGB-to-grayscale image conversion technique that applies a gaussian filter "
+            "to reduce edge artifacts. Processes a large 32K x 32K image and measures "
+            "the time required to apply the filter."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "-i",
@@ -231,6 +272,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ["SYCL", "application", "image", "throughput"]
+
 
 class QuickSilver(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -249,6 +293,13 @@ def run(self, env_vars) -> list[Result]:
     def name(self):
         return "Velocity-Bench QuickSilver"
 
+    def description(self) -> str:
+        return (
+            "Solves a simplified dynamic Monte Carlo particle-transport problem used in HPC. "
+            "Replicates memory access patterns, communication patterns, and branching of Mercury workloads. "
+            "Reports a figure of merit in MMS/CTT where higher values indicate better performance."
+        )
+
     def lower_is_better(self):
         return False
 
@@ -271,6 +322,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ["SYCL", "application", "simulation", "throughput"]
+
 
 class Easywave(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -279,14 +333,22 @@ def __init__(self, vb: VelocityBench):
     def download_deps(self):
         self.download(
             "easywave",
-            "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz",
+            "https://gitlab.oca.eu/AstroGeoGPM/eazyWave/-/raw/master/data/examples.tar.gz",
             "examples.tar.gz",
             untar=True,
+            checksum="3b0cd0efde10122934ba6db8451b8c41f4f95a3370fc967fc5244039ef42aae7e931009af1586fa5ed2143ade8ed47b1",
         )
 
     def name(self):
         return "Velocity-Bench Easywave"
 
+    def description(self) -> str:
+        return (
+            "A tsunami wave simulator used for researching tsunami generation and wave propagation. "
+            "Measures the elapsed time in milliseconds to simulate a specified tsunami event "
+            "based on real-world data."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "-grid",
@@ -327,6 +389,9 @@ def parse_output(self, stdout: str) -> float:
             os.path.join(options.benchmark_cwd, "easywave.log")
         )
 
+    def get_tags(self):
+        return ["SYCL", "application", "simulation"]
+
 
 class CudaSift(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -341,6 +406,13 @@ def download_deps(self):
     def name(self):
         return "Velocity-Bench CudaSift"
 
+    def description(self) -> str:
+        return (
+            "Implementation of the SIFT (Scale Invariant Feature Transform) algorithm "
+            "for detecting, describing, and matching local features in images. "
+            "Measures average processing time in milliseconds."
+        )
+
     def parse_output(self, stdout: str) -> float:
         match = re.search(r"Avg workload time = (\d+\.\d+) ms", stdout)
         if match:
@@ -348,6 +420,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ["SYCL", "application", "image"]
+
 
 class DLCifar(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -364,6 +439,7 @@ def download_deps(self):
             "cifar-10-binary.tar.gz",
             untar=True,
             skip_data_dir=True,
+            checksum="974b1bd62da0cb3b7a42506d42b1e030c9a0cb4a0f2c359063f9c0e65267c48f0329e4493c183a348f44ddc462eaf814",
         )
         return
 
@@ -382,6 +458,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench dl-cifar"
 
+    def description(self) -> str:
+        return (
+            "Deep learning image classification workload based on the CIFAR-10 dataset "
+            "of 60,000 32x32 color images in 10 classes. Uses neural networks to "
+            "classify input images and measures total calculation time."
+        )
+
     def parse_output(self, stdout: str) -> float:
         match = re.search(
             r"dl-cifar - total time for whole calculation: (\d+\.\d+) s", stdout
@@ -391,6 +474,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ["SYCL", "application", "inference", "image"]
+
 
 class DLMnist(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -407,6 +493,7 @@ def download_deps(self):
             "train-images.idx3-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="f40eb179f7c3d2637e789663bde56d444a23e4a0a14477a9e6ed88bc39c8ad6eaff68056c0cd9bb60daf0062b70dc8ee",
         )
         self.download(
             "datasets",
@@ -414,6 +501,7 @@ def download_deps(self):
             "train-labels.idx1-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="ba9c11bf9a7f7c2c04127b8b3e568cf70dd3429d9029ca59b7650977a4ac32f8ff5041fe42bc872097487b06a6794e00",
         )
         self.download(
             "datasets",
@@ -421,6 +509,7 @@ def download_deps(self):
             "t10k-images.idx3-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="1bf45877962fd391f7abb20534a30fd2203d0865309fec5f87d576dbdbefdcb16adb49220afc22a0f3478359d229449c",
         )
         self.download(
             "datasets",
@@ -428,6 +517,7 @@ def download_deps(self):
             "t10k-labels.idx1-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="ccc1ee70f798a04e6bfeca56a4d0f0de8d8eeeca9f74641c1e1bfb00cf7cc4aa4d023f6ea1b40e79bb4707107845479d",
         )
 
     def extra_cmake_args(self):
@@ -445,6 +535,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench dl-mnist"
 
+    def description(self) -> str:
+        return (
+            "Digit recognition based on the MNIST database, one of the oldest and most popular "
+            "databases of handwritten digits. Uses neural networks to identify digits "
+            "and measures total calculation time."
+        )
+
     def bin_args(self):
         return ["-conv_algo", "ONEDNN_AUTO"]
 
@@ -465,6 +562,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ["SYCL", "application", "inference", "image"]
+
 
 class SVM(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -488,6 +588,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench svm"
 
+    def description(self) -> str:
+        return (
+            "Implementation of Support Vector Machine, a popular classical machine learning technique. "
+            "Uses supervised learning models with associated algorithms to analyze data "
+            "for classification and regression analysis. Measures total elapsed time."
+        )
+
     def bin_args(self):
         return [
             f"{self.code_path}/a9a",
@@ -500,3 +607,6 @@ def parse_output(self, stdout: str) -> float:
             return float(match.group(1))
         else:
             raise ValueError("Failed to parse benchmark output.")
+
+    def get_tags(self):
+        return ["SYCL", "application", "inference"]
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 7902aa4f04c35..0b80c54ad7393 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -6,14 +6,14 @@
 import os
 import json
 from pathlib import Path
-from benches.result import Result, BenchmarkRun
+import socket
+from utils.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
 from utils.utils import run
 
 
 class BenchmarkHistory:
-    benchmark_run_index_max = 0
     runs = []
 
     def __init__(self, dir):
@@ -35,42 +35,55 @@ def load(self, n: int):
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
 
-        # Extract index numbers and sort files by index number
-        def extract_index(file_path: Path) -> int:
+        # Extract timestamp and sort files by it
+        def extract_timestamp(file_path: Path) -> str:
             try:
-                return int(file_path.stem.split("_")[0])
-            except (IndexError, ValueError):
-                return -1
+                return file_path.stem.split("_")[-1]
+            except IndexError:
+                return ""
 
-        benchmark_files = [
-            file for file in benchmark_files if extract_index(file) != -1
-        ]
-        benchmark_files.sort(key=extract_index)
+        benchmark_files.sort(key=extract_timestamp, reverse=True)
 
         # Load the first n benchmark files
         benchmark_runs = []
-        for file_path in benchmark_files[n::-1]:
+        for file_path in benchmark_files[:n]:
             benchmark_run = self.load_result(file_path)
             if benchmark_run:
                 benchmark_runs.append(benchmark_run)
 
-        if benchmark_files:
-            self.benchmark_run_index_max = extract_index(benchmark_files[-1])
-
         self.runs = benchmark_runs
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
-            result = run("git rev-parse --short HEAD")
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            result = run("git rev-parse --short HEAD", cwd=script_dir)
             git_hash = result.stdout.decode().strip()
+
+            # Get the GitHub repo URL from git remote
+            remote_result = run("git remote get-url origin", cwd=script_dir)
+            remote_url = remote_result.stdout.decode().strip()
+
+            # Convert SSH or HTTPS URL to owner/repo format
+            if remote_url.startswith("git@github.com:"):
+                # SSH format: git@github.com:owner/repo.git
+                github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
+            elif remote_url.startswith("https://github.com/"):
+                # HTTPS format: https://github.com/owner/repo.git
+                github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+            else:
+                github_repo = None
+
         except:
             git_hash = "unknown"
+            github_repo = None
 
         return BenchmarkRun(
             name=name,
             git_hash=git_hash,
+            github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
+            hostname=socket.gethostname(),
         )
 
     def save(self, save_name, results: list[Result], to_file=True):
@@ -84,12 +97,9 @@ def save(self, save_name, results: list[Result], to_file=True):
         results_dir = Path(os.path.join(self.dir, "results"))
         os.makedirs(results_dir, exist_ok=True)
 
-        self.benchmark_run_index_max += 1
-        file_path = Path(
-            os.path.join(
-                results_dir, f"{self.benchmark_run_index_max}_{save_name}.json"
-            )
-        )
+        # Use formatted timestamp for the filename
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file:
             json.dump(serialized, file, indent=4)
         print(f"Benchmark results saved to {file_path}")
@@ -120,6 +130,7 @@ def compute_average(self, data: list[BenchmarkRun]):
             name=first_run.name,
             git_hash="average",
             date=first_run.date,  # should this be different?
+            hostname=first_run.hostname,
         )
 
         return average_benchmark_run
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 4ad90b39b9001..859aa96e50903 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -27,23 +27,27 @@
 
 
 def run_iterations(
-    benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]]
+    benchmark: Benchmark,
+    env_vars,
+    iters: int,
+    results: dict[str, list[Result]],
+    failures: dict[str, str],
 ):
     for iter in range(iters):
-        print(f"running {benchmark.name()}, iteration {iter}... ", end="", flush=True)
+        print(f"running {benchmark.name()}, iteration {iter}... ", flush=True)
         bench_results = benchmark.run(env_vars)
         if bench_results is None:
-            print(f"did not finish (OK for sycl-bench).")
+            failures[benchmark.name()] = "benchmark produced no results!"
             break
 
         for bench_result in bench_results:
-            # TODO: report failures in markdown/html ?
             if not bench_result.passed:
-                print(f"complete ({bench_result.label}: verification FAILED)")
+                failures[bench_result.label] = "verification failed"
+                print(f"complete ({bench_result.label}: verification failed).")
                 continue
 
             print(
-                f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
+                f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
             )
 
             bench_result.name = bench_result.label
@@ -132,6 +136,18 @@ def process_results(
     return valid_results, processed
 
 
+def collect_metadata(suites):
+    metadata = {}
+
+    for s in suites:
+        metadata.update(s.additionalMetadata())
+        suite_benchmarks = s.benchmarks()
+        for benchmark in suite_benchmarks:
+            metadata[benchmark.name()] = benchmark.get_metadata()
+
+    return metadata
+
+
 def main(directory, additional_env_vars, save_name, compare_names, filter):
     prepare_workdir(directory, INTERNAL_WORKDIR_VERSION)
 
@@ -142,20 +158,21 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         options.extra_ld_libraries.extend(cr.ld_libraries())
         options.extra_env_vars.update(cr.env_vars())
 
-    suites = (
-        [
-            ComputeBench(directory),
-            VelocityBench(directory),
-            SyclBench(directory),
-            LlamaCppBench(directory),
-            UMFSuite(directory),
-            # TestSuite()
-        ]
-        if not options.dry_run
-        else []
-    )
+    suites = [
+        ComputeBench(directory),
+        VelocityBench(directory),
+        SyclBench(directory),
+        LlamaCppBench(directory),
+        UMFSuite(directory),
+        TestSuite(),
+    ]
+
+    # If dry run, we're done
+    if options.dry_run:
+        suites = []
 
     benchmarks = []
+    failures = {}
 
     for s in suites:
         suite_benchmarks = s.benchmarks()
@@ -170,25 +187,26 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             print(f"Setting up {type(s).__name__}")
             try:
                 s.setup()
-            except:
+            except Exception as e:
+                failures[s.name()] = f"Suite setup failure: {e}"
                 print(f"{type(s).__name__} setup failed. Benchmarks won't be added.")
             else:
                 print(f"{type(s).__name__} setup complete.")
                 benchmarks += suite_benchmarks
 
-    for b in benchmarks:
-        print(b.name())
-
     for benchmark in benchmarks:
         try:
-            print(f"Setting up {benchmark.name()}... ")
+            if options.verbose:
+                print(f"Setting up {benchmark.name()}... ")
             benchmark.setup()
-            print(f"{benchmark.name()} setup complete.")
+            if options.verbose:
+                print(f"{benchmark.name()} setup complete.")
 
         except Exception as e:
             if options.exit_on_failure:
                 raise e
             else:
+                failures[benchmark.name()] = f"Benchmark setup failure: {e}"
                 print(f"failed: {e}")
 
     results = []
@@ -199,7 +217,11 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             processed: list[Result] = []
             for _ in range(options.iterations_stddev):
                 run_iterations(
-                    benchmark, merged_env_vars, options.iterations, intermediate_results
+                    benchmark,
+                    merged_env_vars,
+                    options.iterations,
+                    intermediate_results,
+                    failures,
                 )
                 valid, processed = process_results(
                     intermediate_results, benchmark.stddev_threshold()
@@ -211,12 +233,16 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             if options.exit_on_failure:
                 raise e
             else:
+                failures[benchmark.name()] = f"Benchmark run failure: {e}"
                 print(f"failed: {e}")
 
     for benchmark in benchmarks:
-        print(f"tearing down {benchmark.name()}... ", end="", flush=True)
+        # this never has any useful information anyway, so hide it behind verbose
+        if options.verbose:
+            print(f"tearing down {benchmark.name()}... ", flush=True)
         benchmark.teardown()
-        print("complete.")
+        if options.verbose:
+            print("{benchmark.name()} teardown complete.")
 
     this_name = options.current_run_name
     chart_data = {}
@@ -224,7 +250,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if not options.dry_run:
         chart_data = {this_name: results}
 
-    history = BenchmarkHistory(directory)
+    results_dir = directory
+    if options.custom_results_dir:
+        results_dir = Path(options.custom_results_dir)
+    history = BenchmarkHistory(results_dir)
     # limit how many files we load.
     # should this be configurable?
     history.load(1000)
@@ -297,7 +326,7 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument(
         "--adapter",
         type=str,
-        help="Options to build the Unified Runtime as part of the benchmark",
+        help="Unified Runtime adapter to use.",
         default="level_zero",
     )
     parser.add_argument(
@@ -305,6 +334,11 @@ def validate_and_parse_env_args(env_args):
         help="Do not rebuild the benchmarks from scratch.",
         action="store_true",
     )
+    parser.add_argument(
+        "--redownload",
+        help="Always download benchmark data dependencies, even if they already exist.",
+        action="store_true",
+    )
     parser.add_argument(
         "--env",
         type=str,
@@ -423,6 +457,18 @@ def validate_and_parse_env_args(env_args):
         help="Directory for cublas library",
         default=None,
     )
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        help="Specify a custom results directory",
+        default=options.custom_results_dir,
+    )
+    parser.add_argument(
+        "--build-jobs",
+        type=int,
+        help="Number of build jobs to run simultaneously",
+        default=options.build_jobs,
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -430,6 +476,7 @@ def validate_and_parse_env_args(env_args):
     options.workdir = args.benchmark_directory
     options.verbose = args.verbose
     options.rebuild = not args.no_rebuild
+    options.redownload = args.redownload
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
@@ -448,6 +495,8 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
+    options.custom_results_dir = args.results_dir
+    options.build_jobs = args.build_jobs
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 2e92675264544..78eda7ae3c88e 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
+import multiprocessing
 
 
 class Compare(Enum):
@@ -21,6 +22,7 @@ class Options:
     ur_adapter: str = None
     umf: str = None
     rebuild: bool = True
+    redownload: bool = False
     benchmark_cwd: str = "INVALID"
     timeout: float = 600
     iterations: int = 3
@@ -40,6 +42,7 @@ class Options:
     compute_runtime_tag: str = "25.05.32567.12"
     build_igc: bool = False
     current_run_name: str = "This PR"
-
+    custom_results_dir = None
+    build_jobs: int = multiprocessing.cpu_count()
 
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 4ba395bc3aac6..e9c1f135b70cd 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -11,7 +11,7 @@
 from collections import defaultdict
 from dataclasses import dataclass
 import matplotlib.dates as mdates
-from benches.result import BenchmarkRun, Result
+from utils.result import BenchmarkRun, Result
 import numpy as np
 from string import Template
 
diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
index dd6711cec6365..84af97fc51adb 100644
--- a/devops/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import collections
-from benches.result import Result
+from utils.result import Result
 from options import options, MarkdownSize
 import ast
 
diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
index 74d8ff4eb5345..e617168f37a76 100644
--- a/devops/scripts/benchmarks/utils/compute_runtime.py
+++ b/devops/scripts/benchmarks/utils/compute_runtime.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -62,7 +62,7 @@ def build_gmmlib(self, repo, commit):
             f"-DCMAKE_BUILD_TYPE=Release",
         ]
         run(configure_command)
-        run(f"cmake --build {self.gmmlib_build} -j")
+        run(f"cmake --build {self.gmmlib_build} -j {options.build_jobs}")
         run(f"cmake --install {self.gmmlib_build}")
         return self.gmmlib_install
 
@@ -87,7 +87,7 @@ def build_level_zero(self, repo, commit):
             f"-DCMAKE_BUILD_TYPE=Release",
         ]
         run(configure_command)
-        run(f"cmake --build {self.level_zero_build} -j")
+        run(f"cmake --build {self.level_zero_build} -j {options.build_jobs}")
         run(f"cmake --install {self.level_zero_build}")
         return self.level_zero_install
 
@@ -142,8 +142,11 @@ def build_igc(self, repo, commit):
         ]
         run(configure_command)
 
-        # set timeout to 30min. IGC takes A LONG time to build if building from scratch.
-        run(f"cmake --build {self.igc_build} -j", timeout=600 * 3)
+        # set timeout to 2h. IGC takes A LONG time to build if building from scratch.
+        run(
+            f"cmake --build {self.igc_build} -j {options.build_jobs}",
+            timeout=60 * 60 * 2,
+        )
         # cmake --install doesn't work...
         run("make install", cwd=self.igc_build)
         return self.igc_install
@@ -214,7 +217,7 @@ def build_compute_runtime(self):
             configure_command.append(f"-DIGC_DIR={self.igc}")
 
         run(configure_command)
-        run(f"cmake --build {self.compute_runtime_build} -j")
+        run(f"cmake --build {self.compute_runtime_build} -j {options.build_jobs}")
         return self.compute_runtime_build
 
 
diff --git a/devops/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py
similarity index 78%
rename from devops/scripts/benchmarks/benches/oneapi.py
rename to devops/scripts/benchmarks/utils/oneapi.py
index 0547f6646e39e..fc27b9a8b2d3e 100644
--- a/devops/scripts/benchmarks/benches/oneapi.py
+++ b/devops/scripts/benchmarks/utils/oneapi.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -7,29 +7,33 @@
 from utils.utils import download, run
 from options import options
 import os
+import hashlib
 
 
 class OneAPI:
-    # random unique number for benchmark oneAPI installation
-    ONEAPI_BENCHMARK_INSTANCE_ID = 987654
-
     def __init__(self):
         self.oneapi_dir = os.path.join(options.workdir, "oneapi")
         Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True)
-        # delete if some option is set?
+        self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir)
 
         # can we just hardcode these links?
         self.install_package(
             "dnnl",
             "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh",
+            "6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b",
         )
         self.install_package(
             "mkl",
             "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh",
+            "122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002",
         )
         return
 
-    def install_package(self, name, url):
+    def generate_unique_oneapi_id(self, path):
+        hash_object = hashlib.md5(path.encode())
+        return hash_object.hexdigest()
+
+    def install_package(self, name, url, checksum):
         package_path = os.path.join(self.oneapi_dir, name)
         if Path(package_path).exists():
             print(
@@ -37,11 +41,13 @@ def install_package(self, name, url):
             )
             return
 
-        package = download(self.oneapi_dir, url, f"package_{name}.sh")
+        package = download(
+            self.oneapi_dir, url, f"package_{name}.sh", checksum=checksum
+        )
         try:
             print(f"installing {name}")
             run(
-                f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}"
+                f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance {self.oneapi_instance_id}"
             )
         except:
             print("oneAPI installation likely exists already")
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
new file mode 100644
index 0000000000000..14a2ffa905f34
--- /dev/null
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -0,0 +1,69 @@
+# Copyright (C) 2024-2025 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from dataclasses import dataclass, field
+from dataclasses_json import config, dataclass_json
+from datetime import datetime
+
+
+@dataclass_json
+@dataclass
+class Result:
+    label: str
+    value: float
+    command: list[str]
+    env: dict[str, str]
+    stdout: str
+    passed: bool = True
+    unit: str = ""
+    explicit_group: str = ""
+    # stddev can be optionally set by the benchmark,
+    # if not set, it will be calculated automatically.
+    stddev: float = 0.0
+    git_url: str = ""
+    git_hash: str = ""
+    # values below should not be set by the benchmark
+    name: str = ""
+    lower_is_better: bool = True
+    suite: str = "Unknown"
+
+@dataclass_json
+@dataclass
+class BenchmarkRun:
+    results: list[Result]
+    name: str = "This PR"
+    hostname: str = "Unknown"
+    git_hash: str = ""
+    github_repo: str = None
+    date: datetime = field(
+        default=None,
+        metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat),
+    )
+
+
+@dataclass_json
+@dataclass
+class BenchmarkTag:
+    name: str
+    description: str = ""
+
+
+@dataclass_json
+@dataclass
+class BenchmarkMetadata:
+    type: str = "benchmark"  # or 'group'
+    description: str = None
+    notes: str = None
+    unstable: str = None
+    tags: list[str] = field(default_factory=list)
+
+
+@dataclass_json
+@dataclass
+class BenchmarkOutput:
+    runs: list[BenchmarkRun]
+    metadata: dict[str, BenchmarkMetadata]
+    tags: dict[str, BenchmarkTag]
+    default_compare_names: list[str] = field(default_factory=list)
diff --git a/devops/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
index 3a516e8d724f7..54f2ef7fb9c1f 100644
--- a/devops/scripts/benchmarks/utils/utils.py
+++ b/devops/scripts/benchmarks/utils/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -12,6 +12,7 @@
 import urllib  # nosec B404
 from options import options
 from pathlib import Path
+import hashlib
 
 
 def run(
@@ -45,6 +46,12 @@ def run(
 
         env.update(env_vars)
 
+        if options.verbose:
+            command_str = " ".join(command)
+            env_str = " ".join(f"{key}={value}" for key, value in env_vars.items())
+            full_command_str = f"{env_str} {command_str}".strip()
+            print(f"Running: {full_command_str}")
+
         result = subprocess.run(
             command,
             cwd=cwd,
@@ -107,7 +114,7 @@ def prepare_workdir(dir, version):
                 shutil.rmtree(dir)
         else:
             raise Exception(
-                f"The directory {dir} exists but is a benchmark work directory."
+                f"The directory {dir} exists but is not a benchmark work directory."
             )
 
     os.makedirs(dir)
@@ -128,11 +135,26 @@ def create_build_path(directory, name):
     return build_path
 
 
-def download(dir, url, file, untar=False, unzip=False):
+def calculate_checksum(file_path):
+    sha_hash = hashlib.sha384()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha_hash.update(byte_block)
+    return sha_hash.hexdigest()
+
+
+def download(dir, url, file, untar=False, unzip=False, checksum=""):
     data_file = os.path.join(dir, file)
     if not Path(data_file).exists():
         print(f"{data_file} does not exist, downloading")
         urllib.request.urlretrieve(url, data_file)
+        calculated_checksum = calculate_checksum(data_file)
+        if calculated_checksum != checksum:
+            print(
+                f"Checksum mismatch: expected {checksum}, got {calculated_checksum}. Refusing to continue."
+            )
+            exit(1)
+
         if untar:
             file = tarfile.open(data_file)
             file.extractall(dir)