diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index d1bb5fb53b83a..4c2973d250e3d 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -1,16 +1,37 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +from dataclasses import dataclass import os import shutil from pathlib import Path -from .result import Result +from utils.result import BenchmarkMetadata, BenchmarkTag, Result from options import options from utils.utils import download, run -import urllib.request -import tarfile + +benchmark_tags = [ + BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"), + BenchmarkTag("UR", "Benchmark uses Unified Runtime API"), + BenchmarkTag("L0", "Benchmark uses Level Zero API directly"), + BenchmarkTag("UMF", "Benchmark uses Unified Memory Framework directly"), + BenchmarkTag("micro", "Microbenchmark focusing on a specific functionality"), + BenchmarkTag("application", "Real application-based performance test"), + BenchmarkTag("proxy", "Benchmark that simulates real application use-cases"), + BenchmarkTag("submit", "Tests kernel submission performance"), + BenchmarkTag("math", "Tests math computation performance"), + BenchmarkTag("memory", "Tests memory transfer or bandwidth performance"), + BenchmarkTag("allocation", "Tests memory allocation performance"), + BenchmarkTag("graph", "Tests graph-based execution performance"), + BenchmarkTag("latency", "Measures operation latency"), + BenchmarkTag("throughput", "Measures operation throughput"), + BenchmarkTag("inference", "Tests ML/AI inference performance"), + BenchmarkTag("image", "Image processing benchmark"), + BenchmarkTag("simulation", "Physics or scientific simulation benchmark"), +] + +benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags} class Benchmark: @@ -55,19 +76,25 @@ def create_data_path(self, name, skip_data_dir=False): data_path = os.path.join(self.directory, name) else: data_path = os.path.join(self.directory, "data", name) - if options.rebuild and Path(data_path).exists(): + if options.redownload and Path(data_path).exists(): shutil.rmtree(data_path) Path(data_path).mkdir(parents=True, exist_ok=True) return data_path - def download(self, name, url, file, untar=False, unzip=False, skip_data_dir=False): + def download( + self, + name, + url, + file, + untar=False, + unzip=False, + skip_data_dir=False, + checksum="", + ): self.data_path = self.create_data_path(name, skip_data_dir) - return download(self.data_path, url, file, untar, unzip) - - def name(self): - raise NotImplementedError() + return download(self.data_path, url, file, untar, unzip, checksum) def lower_is_better(self): return True @@ -87,6 +114,30 @@ def stddev_threshold(self): def get_suite_name(self) -> str: return self.suite.name() + def name(self): + raise NotImplementedError() + + def description(self): + return "" + + def notes(self) -> str: + return None + + def unstable(self) -> str: + return None + + def get_tags(self) -> list[str]: + return [] + + def get_metadata(self) -> BenchmarkMetadata: + return BenchmarkMetadata( + type="benchmark", + description=self.description(), + notes=self.notes(), + unstable=self.unstable(), + tags=self.get_tags(), + ) + class Suite: def benchmarks(self) -> list[Benchmark]: @@ -97,3 +148,6 @@ def name(self) -> str: def setup(self): return + + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + return {} diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 4658a3414e16a..d83a0d081af57 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -8,10 +8,33 @@ import io from utils.utils import run, git_clone, create_build_path from .base import Benchmark, Suite -from .result import Result +from utils.result import BenchmarkMetadata, Result from options import options from enum import Enum + +class RUNTIMES(Enum): + SYCL = "sycl" + LEVEL_ZERO = "l0" + UR = "ur" + + +def runtime_to_name(runtime: RUNTIMES) -> str: + return { + RUNTIMES.SYCL: "SYCL", + RUNTIMES.LEVEL_ZERO: "Level Zero", + RUNTIMES.UR: "Unified Runtime", + }[runtime] + + +def runtime_to_tag_name(runtime: RUNTIMES) -> str: + return { + RUNTIMES.SYCL: "SYCL", + RUNTIMES.LEVEL_ZERO: "L0", + RUNTIMES.UR: "UR", + }[runtime] + + class ComputeBench(Suite): def __init__(self, directory): self.directory = directory @@ -19,6 +42,12 @@ def __init__(self, directory): def name(self) -> str: return "Compute Benchmarks" + def git_url(self) -> str: + return "https://github.com/intel/compute-benchmarks.git" + + def git_hash(self) -> str: + return "b5cc46acf61766ab00da04e85bd4da4f7591eb21" + def setup(self): if options.sycl is None: return @@ -26,8 +55,8 @@ def setup(self): repo_path = git_clone( self.directory, "compute-benchmarks-repo", - "https://github.com/intel/compute-benchmarks.git", - "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba", + self.git_url(), + self.git_hash(), ) build_path = create_build_path(self.directory, "compute-benchmarks-build") @@ -47,13 +76,43 @@ def setup(self): f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime", ] - print(f"{self.__class__.__name__}: Run {configure_command}") run(configure_command, add_sycl=True) - print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j") - run(f"cmake --build {build_path} -j", add_sycl=True) + + run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True) self.built = True + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + return { + "SubmitKernel": BenchmarkMetadata( + type="group", + description="Measures CPU time overhead of submitting kernels through different APIs.", + notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n" + "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" + "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" + "Work is ongoing to reduce the overhead of the SYCL API\n", + tags=["submit", "micro", "SYCL", "UR", "L0"], + ), + "SinKernelGraph": BenchmarkMetadata( + type="group", + unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.", + tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"], + ), + "SubmitGraph": BenchmarkMetadata( + type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"] + ), + } + + def enabled_runtimes(self, supported_runtimes=None): + # all runtimes in the RUNTIMES enum + runtimes = supported_runtimes or list(RUNTIMES) + + # Filter out UR if not available + if options.ur is None: + runtimes = [r for r in runtimes if r != RUNTIMES.UR] + + return runtimes + def benchmarks(self) -> list[Benchmark]: if options.sycl is None: return [] @@ -61,11 +120,46 @@ def benchmarks(self) -> list[Benchmark]: if options.ur_adapter == "cuda": return [] - benches = [ - SubmitKernelL0(self, 0), - SubmitKernelL0(self, 1), - SubmitKernelSYCL(self, 0), - SubmitKernelSYCL(self, 1), + benches = [] + + # Add SubmitKernel benchmarks using loops + for runtime in self.enabled_runtimes(): + for in_order_queue in [0, 1]: + for measure_completion in [0, 1]: + benches.append( + SubmitKernel(self, runtime, in_order_queue, measure_completion) + ) + + # Add SinKernelGraph benchmarks + for runtime in self.enabled_runtimes(): + for with_graphs in [0, 1]: + for num_kernels in [5, 100]: + benches.append( + GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels) + ) + + # Add ULLS benchmarks + for runtime in self.enabled_runtimes([RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO]): + benches.append(UllsEmptyKernel(self, runtime, 1000, 256)) + benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1)) + + # Add GraphApiSubmitGraph benchmarks + for runtime in self.enabled_runtimes([RUNTIMES.SYCL]): + for in_order_queue in [0, 1]: + for num_kernels in [4, 10, 32]: + for measure_completion_time in [0, 1]: + benches.append( + GraphApiSubmitGraph( + self, + runtime, + in_order_queue, + num_kernels, + measure_completion_time, + ) + ) + + # Add other benchmarks + benches += [ QueueInOrderMemcpy(self, 0, "Device", "Device", 1024), QueueInOrderMemcpy(self, 0, "Host", "Device", 1024), QueueMemcpy(self, "Device", "Device", 1024), @@ -73,29 +167,14 @@ def benchmarks(self) -> list[Benchmark]: ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024), ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024), VectorSum(self), - MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), - MemcpyExecute(self, 400, 8, 1024, 100, 1, 1, 1), - MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), - MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100), ] + # Add UR-specific benchmarks if options.ur is not None: benches += [ - SubmitKernelUR(self, 0, 0), - SubmitKernelUR(self, 1, 0), - SubmitKernelUR(self, 1, 1), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 100), + MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), + MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), + MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), ] return benches @@ -130,6 +209,9 @@ def setup(self): def explicit_group(self): return "" + def description(self) -> str: + return "" + def run(self, env_vars) -> list[Result]: command = [ f"{self.benchmark_bin}", @@ -161,6 +243,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=parse_unit_type(unit), + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), ) ) return ret @@ -192,74 +276,52 @@ def teardown(self): return -class SubmitKernelSYCL(ComputeBenchmark): - def __init__(self, bench, ioq): +class SubmitKernel(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0): self.ioq = ioq - super().__init__(bench, "api_overhead_benchmark_sycl", "SubmitKernel") - - def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_sycl SubmitKernel {order}" - - def explicit_group(self): - return "SubmitKernel" - - def bin_args(self) -> list[str]: - return [ - f"--Ioq={self.ioq}", - "--DiscardEvents=0", - "--MeasureCompletion=0", - "--iterations=100000", - "--Profiling=0", - "--NumKernels=10", - "--KernelExecTime=1", - ] + self.runtime = runtime + self.measure_completion = measure_completion + super().__init__( + bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel" + ) - -class SubmitKernelUR(ComputeBenchmark): - def __init__(self, bench, ioq, measureCompletion): - self.ioq = ioq - self.measureCompletion = measureCompletion - super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel") + def get_tags(self): + return ["submit", "latency", runtime_to_tag_name(self.runtime), "micro"] def name(self): order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_ur SubmitKernel {order}" + ( - " with measure completion" if self.measureCompletion else "" - ) + completion_str = " with measure completion" if self.measure_completion else "" + return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}" def explicit_group(self): - return "SubmitKernel" - - def bin_args(self) -> list[str]: - return [ - f"--Ioq={self.ioq}", - "--DiscardEvents=0", - f"--MeasureCompletion={self.measureCompletion}", - "--iterations=100000", - "--Profiling=0", - "--NumKernels=10", - "--KernelExecTime=1", - ] + return ( + "SubmitKernel" + if self.measure_completion == 0 + else "SubmitKernel With Completion" + ) + def description(self) -> str: + order = "in-order" if self.ioq else "out-of-order" + runtime_name = runtime_to_name(self.runtime) -class SubmitKernelL0(ComputeBenchmark): - def __init__(self, bench, ioq): - self.ioq = ioq - super().__init__(bench, "api_overhead_benchmark_l0", "SubmitKernel") + completion_desc = "" + if self.runtime == RUNTIMES.UR: + completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time" - def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_l0 SubmitKernel {order}" + l0_specific = "" + if self.runtime == RUNTIMES.LEVEL_ZERO: + l0_specific = " Uses immediate command lists" - def explicit_group(self): - return "SubmitKernel" + return ( + f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. " + f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}" + ) def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", "--DiscardEvents=0", - "--MeasureCompletion=0", + f"--MeasureCompletion={self.measure_completion}", "--iterations=100000", "--Profiling=0", "--NumKernels=10", @@ -280,6 +342,17 @@ def name(self): order = "in order" if self.ioq else "out of order" return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" + def description(self) -> str: + order = "in-order" if self.ioq else "out-of-order" + operation = "copy-only" if self.isCopyOnly else "copy and command submission" + return ( + f"Measures SYCL {order} queue overhead for {operation} from {self.source} to " + f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads." + ) + + def get_tags(self): + return ["memory", "submit", "latency", "SYCL", "micro"] + def bin_args(self) -> list[str]: return [ "--iterations=100000", @@ -303,6 +376,16 @@ def __init__(self, bench, isCopyOnly, source, destination, size): def name(self): return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" + def description(self) -> str: + operation = "copy-only" if self.isCopyOnly else "copy and command submission" + return ( + f"Measures SYCL in-order queue memory copy performance for {operation} from " + f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration." + ) + + def get_tags(self): + return ["memory", "latency", "SYCL", "micro"] + def bin_args(self) -> list[str]: return [ "--iterations=10000", @@ -324,6 +407,15 @@ def __init__(self, bench, source, destination, size): def name(self): return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}" + def description(self) -> str: + return ( + f"Measures general SYCL queue memory copy performance from {self.source} to " + f"{self.destination} with {self.size} bytes per operation." + ) + + def get_tags(self): + return ["memory", "latency", "SYCL", "micro"] + def bin_args(self) -> list[str]: return [ "--iterations=10000", @@ -343,10 +435,19 @@ def __init__(self, bench, type, size, placement): def name(self): return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}" + def description(self) -> str: + return ( + f"Measures {self.placement} memory bandwidth using {self.type} pattern with " + f"{self.size} bytes. Higher values (GB/s) indicate better performance." + ) + # measurement is in GB/s def lower_is_better(self): return False + def get_tags(self): + return ["memory", "throughput", "SYCL", "micro"] + def bin_args(self) -> list[str]: return [ "--iterations=10000", @@ -356,6 +457,7 @@ def bin_args(self) -> list[str]: "--useEvents=0", "--contents=Zeros", "--multiplier=1", + "--vectorSize=1", ] @@ -366,6 +468,15 @@ def __init__(self, bench): def name(self): return f"miscellaneous_benchmark_sycl VectorSum" + def description(self) -> str: + return ( + "Measures performance of vector addition across 3D grid (512x256x256 elements) " + "using SYCL." + ) + + def get_tags(self): + return ["math", "throughput", "SYCL", "micro"] + def bin_args(self) -> list[str]: return [ "--iterations=1000", @@ -402,6 +513,19 @@ def name(self): + (" without events" if not self.useEvents else "") ) + def description(self) -> str: + src_type = "device" if self.srcUSM == 1 else "host" + dst_type = "device" if self.dstUSM == 1 else "host" + events = "with" if self.useEvents else "without" + return ( + f"Measures multithreaded memory copy performance with {self.numThreads} threads " + f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes " + f"from {src_type} to {dst_type} memory {events} events." + ) + + def get_tags(self): + return ["memory", "latency", "UR", "micro"] + def bin_args(self) -> list[str]: return [ "--Ioq=1", @@ -417,12 +541,6 @@ def bin_args(self) -> list[str]: ] -class RUNTIMES(Enum): - SYCL = "sycl" - LEVEL_ZERO = "l0" - UR = "ur" - - class GraphApiSinKernelGraph(ComputeBenchmark): def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels): self.withGraphs = withGraphs @@ -435,9 +553,29 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels): def explicit_group(self): return f"SinKernelGraph {self.numKernels}" + def description(self) -> str: + execution = "using graphs" if self.withGraphs else "without graphs" + return ( + f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} " + f"sin kernels {execution}. Tests overhead and benefits of graph-based execution." + ) + def name(self): return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}" + def unstable(self) -> str: + return "This benchmark combines both eager and graph execution, and may not be representative of real use cases." + + def get_tags(self): + return [ + "graph", + runtime_to_tag_name(self.runtime), + "proxy", + "submit", + "memory", + "latency", + ] + def bin_args(self) -> list[str]: return [ "--iterations=10000", @@ -448,26 +586,115 @@ def bin_args(self) -> list[str]: ] -class GraphApiSubmitExecGraph(ComputeBenchmark): - def __init__(self, bench, ioq, submit, numKernels): - self.ioq = ioq - self.submit = submit +class GraphApiSubmitGraph(ComputeBenchmark): + def __init__( + self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime + ): + self.inOrderQueue = inOrderQueue self.numKernels = numKernels - super().__init__(bench, "graph_api_benchmark_sycl", "SubmitExecGraph") + self.runtime = runtime + self.measureCompletionTime = measureCompletionTime + super().__init__(bench, f"graph_api_benchmark_{runtime.value}", "SubmitGraph") + + def explicit_group(self): + return f"SubmitGraph {self.numKernels}" + + def description(self) -> str: + return ( + f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} " + f"trivial kernels using graphs. Tests overhead and benefits of graph-based execution." + ) + + def name(self): + return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}" + + def get_tags(self): + return [ + "graph", + runtime_to_tag_name(self.runtime), + "micro", + "submit", + "latency", + ] + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--NumKernels={self.numKernels}", + f"--MeasureCompletionTime={self.measureCompletionTime}", + f"--InOrderQueue={self.inOrderQueue}", + "--Profiling=0", + "--KernelExecutionTime=1", + ] + + +class UllsEmptyKernel(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, wgc, wgs): + self.wgc = wgc + self.wgs = wgs + self.runtime = runtime + super().__init__(bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel") + + def explicit_group(self): + return f"EmptyKernel {self.wgc} {self.wgs}" + + def description(self) -> str: + return "" def name(self): - return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{self.ioq}, submit:{self.submit}, numKernels:{self.numKernels}" + return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}" + + def get_tags(self): + return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--wgs={self.wgs}", + f"--wgc={self.wgs}", + ] + + +class UllsKernelSwitch(ComputeBenchmark): + def __init__( + self, + bench, + runtime: RUNTIMES, + count, + kernelTime, + barrier, + hostVisible, + ioq, + ctrBasedEvents, + ): + self.count = count + self.kernelTime = kernelTime + self.barrier = barrier + self.hostVisible = hostVisible + self.ctrBasedEvents = ctrBasedEvents + self.runtime = runtime + self.ioq = ioq + super().__init__(bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch") def explicit_group(self): - if self.submit: - return "SubmitGraph" - else: - return "ExecGraph" + return f"KernelSwitch {self.count} {self.kernelTime}" + + def description(self) -> str: + return "" + + def name(self): + return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}" + + def get_tags(self): + return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] def bin_args(self) -> list[str]: return [ - "--iterations=100", - f"--measureSubmit={self.submit}", + "--iterations=1000", + f"--count={self.count}", + f"--kernelTime={self.kernelTime}", + f"--barrier={self.barrier}", + f"--hostVisible={self.hostVisible}", f"--ioq={self.ioq}", - f"--numKernels={self.numKernels}", + f"--ctrBasedEvents={self.ctrBasedEvents}", ] diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index 6524c95a9f56f..86d41ed525292 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -8,10 +8,10 @@ from pathlib import Path from utils.utils import download, git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import os @@ -25,6 +25,12 @@ def __init__(self, directory): def name(self) -> str: return "llama.cpp bench" + def git_url(self) -> str: + return "https://github.com/ggerganov/llama.cpp" + + def git_hash(self) -> str: + return "1ee9eea094fe5846c7d8d770aa7caa749d246b23" + def setup(self): if options.sycl is None: return @@ -32,8 +38,8 @@ def setup(self): repo_path = git_clone( self.directory, "llamacpp-repo", - "https://github.com/ggerganov/llama.cpp", - "1ee9eea094fe5846c7d8d770aa7caa749d246b23", + self.git_url(), + self.git_hash(), ) self.models_dir = os.path.join(self.directory, "models") @@ -43,6 +49,7 @@ def setup(self): self.models_dir, "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", "Phi-3-mini-4k-instruct-q4.gguf", + checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4", ) self.oneapi = get_oneapi() @@ -62,11 +69,11 @@ def setup(self): f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"', f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}", ] - print(f"{self.__class__.__name__}: Run {configure_command}") + run(configure_command, add_sycl=True) - print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j") + run( - f"cmake --build {self.build_path} -j", + f"cmake --build {self.build_path} -j {options.build_jobs}", add_sycl=True, ld_library=self.oneapi.ld_libraries(), ) @@ -92,6 +99,17 @@ def setup(self): def name(self): return f"llama.cpp" + def description(self) -> str: + return ( + "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. " + "Runs both prompt processing (initial context processing) and text generation benchmarks with " + "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct " + "quantized model and leverages SYCL with oneDNN for acceleration." + ) + + def get_tags(self): + return ["SYCL", "application", "inference", "throughput"] + def lower_is_better(self): return False @@ -130,6 +148,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit="token/s", + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), ) ) return results diff --git a/devops/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/benches/result.py deleted file mode 100644 index 52a098d91c24a..0000000000000 --- a/devops/scripts/benchmarks/benches/result.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from dataclasses import dataclass -from typing import Optional -from dataclasses_json import dataclass_json -from datetime import datetime - - -@dataclass_json -@dataclass -class Result: - label: str - value: float - command: str - env: str - stdout: str - passed: bool = True - unit: str = "" - explicit_group: str = "" - # stddev can be optionally set by the benchmark, - # if not set, it will be calculated automatically. - stddev: float = 0.0 - # values below should not be set by the benchmark - name: str = "" - lower_is_better: bool = True - git_hash: str = "" - date: Optional[datetime] = None - suite: str = "Unknown" - - -@dataclass_json -@dataclass -class BenchmarkRun: - results: list[Result] - name: str = "This PR" - git_hash: str = "" - date: datetime = None diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index f7cf571a7ecd7..9854c92d338fc 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -8,7 +8,7 @@ import io from utils.utils import run, git_clone, create_build_path from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from options import options @@ -23,6 +23,12 @@ def __init__(self, directory): def name(self) -> str: return "SYCL-Bench" + def git_url(self) -> str: + return "https://github.com/unisa-hpc/sycl-bench.git" + + def git_hash(self) -> str: + return "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b" + def setup(self): if options.sycl is None: return @@ -31,8 +37,8 @@ def setup(self): repo_path = git_clone( self.directory, "sycl-bench-repo", - "https://github.com/mateuszpn/sycl-bench.git", - "1e6ab2cfd004a72c5336c26945965017e06eab71", + self.git_url(), + self.git_hash(), ) configure_command = [ @@ -51,7 +57,7 @@ def setup(self): ] run(configure_command, add_sycl=True) - run(f"cmake --build {build_path} -j", add_sycl=True) + run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True) self.built = True @@ -65,14 +71,14 @@ def benchmarks(self) -> list[Benchmark]: DagTaskS(self), HostDevBandwidth(self), LocalMem(self), - Pattern_L2(self), - Reduction(self), + # Pattern_L2(self), # validation failure + # Reduction(self), # validation failure ScalarProd(self), SegmentReduction(self), - UsmAccLatency(self), + # UsmAccLatency(self), # validation failure UsmAllocLatency(self), - UsmInstrMix(self), - UsmPinnedOverhead(self), + # UsmInstrMix(self), # validation failure + # UsmPinnedOverhead(self), # validation failure VecAdd(self), # *** sycl-bench single benchmarks # TwoDConvolution(self), # run time < 1ms @@ -82,20 +88,20 @@ def benchmarks(self) -> list[Benchmark]: Atax(self), # Atomic_reduction(self), # run time < 1ms Bicg(self), - Correlation(self), - Covariance(self), - Gemm(self), - Gesumv(self), - Gramschmidt(self), + # Correlation(self), # validation failure + # Covariance(self), # validation failure + # Gemm(self), # validation failure + # Gesumv(self), # validation failure + # Gramschmidt(self), # validation failure KMeans(self), LinRegCoeff(self), # LinRegError(self), # run time < 1ms - MatmulChain(self), + # MatmulChain(self), # validation failure MolDyn(self), - Mvt(self), + # Mvt(self), # validation failure Sf(self), - Syr2k(self), - Syrk(self), + # Syr2k(self), # validation failure + # Syrk(self), # validation failure ] @@ -105,7 +111,6 @@ def __init__(self, bench, name, test): self.bench = bench self.bench_name = name self.test = test - self.done = False def bin_args(self) -> list[str]: return [] @@ -113,16 +118,26 @@ def bin_args(self) -> list[str]: def extra_env_vars(self) -> dict: return {} + def get_tags(self): + base_tags = ["SYCL", "micro"] + if "Memory" in self.bench_name or "mem" in self.bench_name.lower(): + base_tags.append("memory") + if "Reduction" in self.bench_name: + base_tags.append("math") + if "Bandwidth" in self.bench_name: + base_tags.append("throughput") + if "Latency" in self.bench_name: + base_tags.append("latency") + return base_tags + def setup(self): self.benchmark_bin = os.path.join( self.directory, "sycl-bench-build", self.bench_name ) def run(self, env_vars) -> list[Result]: - if self.done: - return self.outputfile = os.path.join(self.bench.directory, self.test + ".csv") - print(f"{self.__class__.__name__}: Results in {self.outputfile}") + command = [ f"{self.benchmark_bin}", f"--warmup-run", @@ -143,25 +158,27 @@ def run(self, env_vars) -> list[Result]: if not row[0].startswith("#"): res_list.append( Result( - label=row[0], + label=f"{self.name()} {row[0]}", value=float(row[12]) * 1000, # convert to ms passed=(row[1] == "PASS"), command=command, env=env_vars, stdout=row, unit="ms", + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), ) ) - self.done = True - return res_list - def teardown(self): - print(f"Removing {self.outputfile}...") os.remove(self.outputfile) - return + + return res_list def name(self): - return self.test + return f"{self.bench.name()} {self.test}" + + def teardown(self): + return # multi benchmarks diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index 06eac12b25344..ad1e8c9e57735 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -6,7 +6,7 @@ import random from utils.utils import git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import BenchmarkMetadata, Result from utils.utils import run, create_build_path from options import options import os @@ -19,35 +19,56 @@ def __init__(self): def setup(self): return + def name(self) -> str: + return "Test Suite" + def benchmarks(self) -> list[Benchmark]: bench_configs = [ - ("Memory Bandwidth", 2000, 200, "Foo Group"), - ("Latency", 100, 20, "Bar Group"), - ("Throughput", 1500, 150, "Foo Group"), - ("FLOPS", 3000, 300, "Foo Group"), - ("Cache Miss Rate", 250, 25, "Bar Group"), + ("Memory Bandwidth", 2000, 200, "Foo Group", None, None), + ("Latency", 100, 20, "Bar Group", "A Latency test note!", None), + ("Throughput", 1500, 150, "Foo Group", None, None), + ("FLOPS", 3000, 300, "Foo Group", None, "Unstable FLOPS test!"), + ("Cache Miss Rate", 250, 25, "Bar Group", "Test Note", "And another note!"), ] result = [] - for base_name, base_value, base_diff, group in bench_configs: + for base_name, base_value, base_diff, group, notes, unstable in bench_configs: for variant in range(6): value_multiplier = 1.0 + (variant * 0.2) name = f"{base_name} {variant+1}" value = base_value * value_multiplier diff = base_diff * value_multiplier - result.append(TestBench(name, value, diff, group)) + result.append( + TestBench(self, name, value, diff, group, notes, unstable) + ) return result + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + return { + "Foo Group": BenchmarkMetadata( + type="group", + description="This is a test benchmark for Foo Group.", + notes="This is a test note for Foo Group.\n" "Look, multiple lines!", + ), + "Bar Group": BenchmarkMetadata( + type="group", + description="This is a test benchmark for Bar Group.", + unstable="This is an unstable note for Bar Group.", + ), + } + class TestBench(Benchmark): - def __init__(self, name, value, diff, group=""): + def __init__(self, suite, name, value, diff, group="", notes=None, unstable=None): + super().__init__("", suite) self.bname = name self.value = value self.diff = diff self.group = group - super().__init__("") + self.notes_text = notes + self.unstable_text = unstable def name(self): return self.bname @@ -58,6 +79,15 @@ def lower_is_better(self): def setup(self): return + def description(self) -> str: + return f"This is a test benchmark for {self.bname}." + + def notes(self) -> str: + return self.notes_text + + def unstable(self) -> str: + return self.unstable_text + def run(self, env_vars) -> list[Result]: random_value = self.value + random.uniform(-1 * (self.diff), self.diff) return [ @@ -65,7 +95,7 @@ def run(self, env_vars) -> list[Result]: label=self.name(), explicit_group=self.group, value=random_value, - command="", + command=["test", "--arg1", "foo"], env={"A": "B"}, stdout="no output", unit="ms", diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py index c7b767f02bbe1..f0b92777dd2f8 100644 --- a/devops/scripts/benchmarks/benches/umf.py +++ b/devops/scripts/benchmarks/benches/umf.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -6,10 +6,10 @@ import random from utils.utils import git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import os import csv import io @@ -22,8 +22,6 @@ def isUMFAvailable(): class UMFSuite(Suite): def __init__(self, directory): self.directory = directory - if not isUMFAvailable(): - print("UMF not provided. Related benchmarks will not run") def name(self) -> str: return "UMF" @@ -76,6 +74,9 @@ def setup(self): self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name) + def get_tags(self): + return ["UMF", "allocation", "latency", "micro"] + def run(self, env_vars) -> list[Result]: command = [ f"{self.benchmark_bin}", diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index b7d06cbe4a3a2..493298dea8b10 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,10 +7,10 @@ import shutil from utils.utils import git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import shutil import os @@ -26,6 +26,12 @@ def __init__(self, directory): def name(self) -> str: return "Velocity Bench" + def git_url(self) -> str: + return "https://github.com/oneapi-src/Velocity-Bench/" + + def git_hash(self) -> str: + return "b22215c16f789100449c34bf4eaa3fb178983d69" + def setup(self): if options.sycl is None: return @@ -33,8 +39,8 @@ def setup(self): self.repo_path = git_clone( self.directory, "velocity-bench-repo", - "https://github.com/oneapi-src/Velocity-Bench/", - "b22215c16f789100449c34bf4eaa3fb178983d69", + self.git_url(), + self.git_hash(), ) def benchmarks(self) -> list[Benchmark]: @@ -101,7 +107,7 @@ def setup(self): run(configure_command, {"CC": "clang", "CXX": "clang++"}, add_sycl=True) run( - f"cmake --build {build_path} -j", + f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True, ld_library=self.ld_libraries(), ) @@ -115,6 +121,12 @@ def extra_env_vars(self) -> dict: def parse_output(self, stdout: str) -> float: raise NotImplementedError() + def description(self) -> str: + return "" + + def get_tags(self): + return ["SYCL", "application"] + def run(self, env_vars) -> list[Result]: env_vars.update(self.extra_env_vars()) @@ -133,6 +145,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=self.unit, + git_url=self.vb.git_url(), + git_hash=self.vb.git_hash(), ) ] @@ -147,6 +161,12 @@ def __init__(self, vb: VelocityBench): def name(self): return "Velocity-Bench Hashtable" + def description(self) -> str: + return ( + "Measures hash table search performance using an efficient lock-free algorithm with linear probing. " + "Reports throughput in millions of keys processed per second. Higher values indicate better performance." + ) + def bin_args(self) -> list[str]: return ["--no-verify"] @@ -162,6 +182,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse keys per second from benchmark output." ) + def get_tags(self): + return ["SYCL", "application", "throughput"] + class Bitcracker(VelocityBase): def __init__(self, vb: VelocityBench): @@ -170,6 +193,13 @@ def __init__(self, vb: VelocityBench): def name(self): return "Velocity-Bench Bitcracker" + def description(self) -> str: + return ( + "Password-cracking application for BitLocker-encrypted memory units. " + "Uses dictionary attack to find user or recovery passwords. " + "Measures total time required to process 60000 passwords." + ) + def bin_args(self) -> list[str]: self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass") @@ -193,6 +223,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ["SYCL", "application", "throughput"] + class SobelFilter(VelocityBase): def __init__(self, vb: VelocityBench): @@ -204,11 +237,19 @@ def download_deps(self): "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz", untar=True, + checksum="7fc62aa729792ede80ed8ae70fb56fa443d479139c5888ed4d4047b98caec106687a0f05886a9ced77922ccba7f65e66", ) def name(self): return "Velocity-Bench Sobel Filter" + def description(self) -> str: + return ( + "Popular RGB-to-grayscale image conversion technique that applies a gaussian filter " + "to reduce edge artifacts. Processes a large 32K x 32K image and measures " + "the time required to apply the filter." + ) + def bin_args(self) -> list[str]: return [ "-i", @@ -231,6 +272,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ["SYCL", "application", "image", "throughput"] + class QuickSilver(VelocityBase): def __init__(self, vb: VelocityBench): @@ -249,6 +293,13 @@ def run(self, env_vars) -> list[Result]: def name(self): return "Velocity-Bench QuickSilver" + def description(self) -> str: + return ( + "Solves a simplified dynamic Monte Carlo particle-transport problem used in HPC. " + "Replicates memory access patterns, communication patterns, and branching of Mercury workloads. " + "Reports a figure of merit in MMS/CTT where higher values indicate better performance." + ) + def lower_is_better(self): return False @@ -271,6 +322,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ["SYCL", "application", "simulation", "throughput"] + class Easywave(VelocityBase): def __init__(self, vb: VelocityBench): @@ -279,14 +333,22 @@ def __init__(self, vb: VelocityBench): def download_deps(self): self.download( "easywave", - "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", + "https://gitlab.oca.eu/AstroGeoGPM/eazyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz", untar=True, + checksum="3b0cd0efde10122934ba6db8451b8c41f4f95a3370fc967fc5244039ef42aae7e931009af1586fa5ed2143ade8ed47b1", ) def name(self): return "Velocity-Bench Easywave" + def description(self) -> str: + return ( + "A tsunami wave simulator used for researching tsunami generation and wave propagation. " + "Measures the elapsed time in milliseconds to simulate a specified tsunami event " + "based on real-world data." + ) + def bin_args(self) -> list[str]: return [ "-grid", @@ -327,6 +389,9 @@ def parse_output(self, stdout: str) -> float: os.path.join(options.benchmark_cwd, "easywave.log") ) + def get_tags(self): + return ["SYCL", "application", "simulation"] + class CudaSift(VelocityBase): def __init__(self, vb: VelocityBench): @@ -341,6 +406,13 @@ def download_deps(self): def name(self): return "Velocity-Bench CudaSift" + def description(self) -> str: + return ( + "Implementation of the SIFT (Scale Invariant Feature Transform) algorithm " + "for detecting, describing, and matching local features in images. " + "Measures average processing time in milliseconds." + ) + def parse_output(self, stdout: str) -> float: match = re.search(r"Avg workload time = (\d+\.\d+) ms", stdout) if match: @@ -348,6 +420,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ["SYCL", "application", "image"] + class DLCifar(VelocityBase): def __init__(self, vb: VelocityBench): @@ -364,6 +439,7 @@ def download_deps(self): "cifar-10-binary.tar.gz", untar=True, skip_data_dir=True, + checksum="974b1bd62da0cb3b7a42506d42b1e030c9a0cb4a0f2c359063f9c0e65267c48f0329e4493c183a348f44ddc462eaf814", ) return @@ -382,6 +458,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench dl-cifar" + def description(self) -> str: + return ( + "Deep learning image classification workload based on the CIFAR-10 dataset " + "of 60,000 32x32 color images in 10 classes. Uses neural networks to " + "classify input images and measures total calculation time." + ) + def parse_output(self, stdout: str) -> float: match = re.search( r"dl-cifar - total time for whole calculation: (\d+\.\d+) s", stdout @@ -391,6 +474,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ["SYCL", "application", "inference", "image"] + class DLMnist(VelocityBase): def __init__(self, vb: VelocityBench): @@ -407,6 +493,7 @@ def download_deps(self): "train-images.idx3-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="f40eb179f7c3d2637e789663bde56d444a23e4a0a14477a9e6ed88bc39c8ad6eaff68056c0cd9bb60daf0062b70dc8ee", ) self.download( "datasets", @@ -414,6 +501,7 @@ def download_deps(self): "train-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="ba9c11bf9a7f7c2c04127b8b3e568cf70dd3429d9029ca59b7650977a4ac32f8ff5041fe42bc872097487b06a6794e00", ) self.download( "datasets", @@ -421,6 +509,7 @@ def download_deps(self): "t10k-images.idx3-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="1bf45877962fd391f7abb20534a30fd2203d0865309fec5f87d576dbdbefdcb16adb49220afc22a0f3478359d229449c", ) self.download( "datasets", @@ -428,6 +517,7 @@ def download_deps(self): "t10k-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="ccc1ee70f798a04e6bfeca56a4d0f0de8d8eeeca9f74641c1e1bfb00cf7cc4aa4d023f6ea1b40e79bb4707107845479d", ) def extra_cmake_args(self): @@ -445,6 +535,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench dl-mnist" + def description(self) -> str: + return ( + "Digit recognition based on the MNIST database, one of the oldest and most popular " + "databases of handwritten digits. Uses neural networks to identify digits " + "and measures total calculation time." + ) + def bin_args(self): return ["-conv_algo", "ONEDNN_AUTO"] @@ -465,6 +562,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ["SYCL", "application", "inference", "image"] + class SVM(VelocityBase): def __init__(self, vb: VelocityBench): @@ -488,6 +588,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench svm" + def description(self) -> str: + return ( + "Implementation of Support Vector Machine, a popular classical machine learning technique. " + "Uses supervised learning models with associated algorithms to analyze data " + "for classification and regression analysis. Measures total elapsed time." + ) + def bin_args(self): return [ f"{self.code_path}/a9a", @@ -500,3 +607,6 @@ def parse_output(self, stdout: str) -> float: return float(match.group(1)) else: raise ValueError("Failed to parse benchmark output.") + + def get_tags(self): + return ["SYCL", "application", "inference"] diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 7902aa4f04c35..0b80c54ad7393 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -6,14 +6,14 @@ import os import json from pathlib import Path -from benches.result import Result, BenchmarkRun +import socket +from utils.result import Result, BenchmarkRun from options import Compare, options from datetime import datetime, timezone from utils.utils import run class BenchmarkHistory: - benchmark_run_index_max = 0 runs = [] def __init__(self, dir): @@ -35,42 +35,55 @@ def load(self, n: int): # Get all JSON files in the results directory benchmark_files = list(results_dir.glob("*.json")) - # Extract index numbers and sort files by index number - def extract_index(file_path: Path) -> int: + # Extract timestamp and sort files by it + def extract_timestamp(file_path: Path) -> str: try: - return int(file_path.stem.split("_")[0]) - except (IndexError, ValueError): - return -1 + return file_path.stem.split("_")[-1] + except IndexError: + return "" - benchmark_files = [ - file for file in benchmark_files if extract_index(file) != -1 - ] - benchmark_files.sort(key=extract_index) + benchmark_files.sort(key=extract_timestamp, reverse=True) # Load the first n benchmark files benchmark_runs = [] - for file_path in benchmark_files[n::-1]: + for file_path in benchmark_files[:n]: benchmark_run = self.load_result(file_path) if benchmark_run: benchmark_runs.append(benchmark_run) - if benchmark_files: - self.benchmark_run_index_max = extract_index(benchmark_files[-1]) - self.runs = benchmark_runs def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: try: - result = run("git rev-parse --short HEAD") + script_dir = os.path.dirname(os.path.abspath(__file__)) + result = run("git rev-parse --short HEAD", cwd=script_dir) git_hash = result.stdout.decode().strip() + + # Get the GitHub repo URL from git remote + remote_result = run("git remote get-url origin", cwd=script_dir) + remote_url = remote_result.stdout.decode().strip() + + # Convert SSH or HTTPS URL to owner/repo format + if remote_url.startswith("git@github.com:"): + # SSH format: git@github.com:owner/repo.git + github_repo = remote_url.split("git@github.com:")[1].rstrip(".git") + elif remote_url.startswith("https://github.com/"): + # HTTPS format: https://github.com/owner/repo.git + github_repo = remote_url.split("https://github.com/")[1].rstrip(".git") + else: + github_repo = None + except: git_hash = "unknown" + github_repo = None return BenchmarkRun( name=name, git_hash=git_hash, + github_repo=github_repo, date=datetime.now(tz=timezone.utc), results=results, + hostname=socket.gethostname(), ) def save(self, save_name, results: list[Result], to_file=True): @@ -84,12 +97,9 @@ def save(self, save_name, results: list[Result], to_file=True): results_dir = Path(os.path.join(self.dir, "results")) os.makedirs(results_dir, exist_ok=True) - self.benchmark_run_index_max += 1 - file_path = Path( - os.path.join( - results_dir, f"{self.benchmark_run_index_max}_{save_name}.json" - ) - ) + # Use formatted timestamp for the filename + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json")) with file_path.open("w") as file: json.dump(serialized, file, indent=4) print(f"Benchmark results saved to {file_path}") @@ -120,6 +130,7 @@ def compute_average(self, data: list[BenchmarkRun]): name=first_run.name, git_hash="average", date=first_run.date, # should this be different? + hostname=first_run.hostname, ) return average_benchmark_run diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 4ad90b39b9001..859aa96e50903 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -27,23 +27,27 @@ def run_iterations( - benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]] + benchmark: Benchmark, + env_vars, + iters: int, + results: dict[str, list[Result]], + failures: dict[str, str], ): for iter in range(iters): - print(f"running {benchmark.name()}, iteration {iter}... ", end="", flush=True) + print(f"running {benchmark.name()}, iteration {iter}... ", flush=True) bench_results = benchmark.run(env_vars) if bench_results is None: - print(f"did not finish (OK for sycl-bench).") + failures[benchmark.name()] = "benchmark produced no results!" break for bench_result in bench_results: - # TODO: report failures in markdown/html ? if not bench_result.passed: - print(f"complete ({bench_result.label}: verification FAILED)") + failures[bench_result.label] = "verification failed" + print(f"complete ({bench_result.label}: verification failed).") continue print( - f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})." + f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})." ) bench_result.name = bench_result.label @@ -132,6 +136,18 @@ def process_results( return valid_results, processed +def collect_metadata(suites): + metadata = {} + + for s in suites: + metadata.update(s.additionalMetadata()) + suite_benchmarks = s.benchmarks() + for benchmark in suite_benchmarks: + metadata[benchmark.name()] = benchmark.get_metadata() + + return metadata + + def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) @@ -142,20 +158,21 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): options.extra_ld_libraries.extend(cr.ld_libraries()) options.extra_env_vars.update(cr.env_vars()) - suites = ( - [ - ComputeBench(directory), - VelocityBench(directory), - SyclBench(directory), - LlamaCppBench(directory), - UMFSuite(directory), - # TestSuite() - ] - if not options.dry_run - else [] - ) + suites = [ + ComputeBench(directory), + VelocityBench(directory), + SyclBench(directory), + LlamaCppBench(directory), + UMFSuite(directory), + TestSuite(), + ] + + # If dry run, we're done + if options.dry_run: + suites = [] benchmarks = [] + failures = {} for s in suites: suite_benchmarks = s.benchmarks() @@ -170,25 +187,26 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): print(f"Setting up {type(s).__name__}") try: s.setup() - except: + except Exception as e: + failures[s.name()] = f"Suite setup failure: {e}" print(f"{type(s).__name__} setup failed. Benchmarks won't be added.") else: print(f"{type(s).__name__} setup complete.") benchmarks += suite_benchmarks - for b in benchmarks: - print(b.name()) - for benchmark in benchmarks: try: - print(f"Setting up {benchmark.name()}... ") + if options.verbose: + print(f"Setting up {benchmark.name()}... ") benchmark.setup() - print(f"{benchmark.name()} setup complete.") + if options.verbose: + print(f"{benchmark.name()} setup complete.") except Exception as e: if options.exit_on_failure: raise e else: + failures[benchmark.name()] = f"Benchmark setup failure: {e}" print(f"failed: {e}") results = [] @@ -199,7 +217,11 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): processed: list[Result] = [] for _ in range(options.iterations_stddev): run_iterations( - benchmark, merged_env_vars, options.iterations, intermediate_results + benchmark, + merged_env_vars, + options.iterations, + intermediate_results, + failures, ) valid, processed = process_results( intermediate_results, benchmark.stddev_threshold() @@ -211,12 +233,16 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if options.exit_on_failure: raise e else: + failures[benchmark.name()] = f"Benchmark run failure: {e}" print(f"failed: {e}") for benchmark in benchmarks: - print(f"tearing down {benchmark.name()}... ", end="", flush=True) + # this never has any useful information anyway, so hide it behind verbose + if options.verbose: + print(f"tearing down {benchmark.name()}... ", flush=True) benchmark.teardown() - print("complete.") + if options.verbose: + print("{benchmark.name()} teardown complete.") this_name = options.current_run_name chart_data = {} @@ -224,7 +250,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if not options.dry_run: chart_data = {this_name: results} - history = BenchmarkHistory(directory) + results_dir = directory + if options.custom_results_dir: + results_dir = Path(options.custom_results_dir) + history = BenchmarkHistory(results_dir) # limit how many files we load. # should this be configurable? history.load(1000) @@ -297,7 +326,7 @@ def validate_and_parse_env_args(env_args): parser.add_argument( "--adapter", type=str, - help="Options to build the Unified Runtime as part of the benchmark", + help="Unified Runtime adapter to use.", default="level_zero", ) parser.add_argument( @@ -305,6 +334,11 @@ def validate_and_parse_env_args(env_args): help="Do not rebuild the benchmarks from scratch.", action="store_true", ) + parser.add_argument( + "--redownload", + help="Always download benchmark data dependencies, even if they already exist.", + action="store_true", + ) parser.add_argument( "--env", type=str, @@ -423,6 +457,18 @@ def validate_and_parse_env_args(env_args): help="Directory for cublas library", default=None, ) + parser.add_argument( + "--results-dir", + type=str, + help="Specify a custom results directory", + default=options.custom_results_dir, + ) + parser.add_argument( + "--build-jobs", + type=int, + help="Number of build jobs to run simultaneously", + default=options.build_jobs, + ) args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) @@ -430,6 +476,7 @@ def validate_and_parse_env_args(env_args): options.workdir = args.benchmark_directory options.verbose = args.verbose options.rebuild = not args.no_rebuild + options.redownload = args.redownload options.sycl = args.sycl options.iterations = args.iterations options.timeout = args.timeout @@ -448,6 +495,8 @@ def validate_and_parse_env_args(env_args): options.current_run_name = args.relative_perf options.cudnn_directory = args.cudnn_directory options.cublas_directory = args.cublas_directory + options.custom_results_dir = args.results_dir + options.build_jobs = args.build_jobs if args.build_igc and args.compute_runtime is None: parser.error("--build-igc requires --compute-runtime to be set") diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 2e92675264544..78eda7ae3c88e 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field from enum import Enum +import multiprocessing class Compare(Enum): @@ -21,6 +22,7 @@ class Options: ur_adapter: str = None umf: str = None rebuild: bool = True + redownload: bool = False benchmark_cwd: str = "INVALID" timeout: float = 600 iterations: int = 3 @@ -40,6 +42,7 @@ class Options: compute_runtime_tag: str = "25.05.32567.12" build_igc: bool = False current_run_name: str = "This PR" - + custom_results_dir = None + build_jobs: int = multiprocessing.cpu_count() options = Options() diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index 4ba395bc3aac6..e9c1f135b70cd 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -11,7 +11,7 @@ from collections import defaultdict from dataclasses import dataclass import matplotlib.dates as mdates -from benches.result import BenchmarkRun, Result +from utils.result import BenchmarkRun, Result import numpy as np from string import Template diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py index dd6711cec6365..84af97fc51adb 100644 --- a/devops/scripts/benchmarks/output_markdown.py +++ b/devops/scripts/benchmarks/output_markdown.py @@ -5,7 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import collections -from benches.result import Result +from utils.result import Result from options import options, MarkdownSize import ast diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py index 74d8ff4eb5345..e617168f37a76 100644 --- a/devops/scripts/benchmarks/utils/compute_runtime.py +++ b/devops/scripts/benchmarks/utils/compute_runtime.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -62,7 +62,7 @@ def build_gmmlib(self, repo, commit): f"-DCMAKE_BUILD_TYPE=Release", ] run(configure_command) - run(f"cmake --build {self.gmmlib_build} -j") + run(f"cmake --build {self.gmmlib_build} -j {options.build_jobs}") run(f"cmake --install {self.gmmlib_build}") return self.gmmlib_install @@ -87,7 +87,7 @@ def build_level_zero(self, repo, commit): f"-DCMAKE_BUILD_TYPE=Release", ] run(configure_command) - run(f"cmake --build {self.level_zero_build} -j") + run(f"cmake --build {self.level_zero_build} -j {options.build_jobs}") run(f"cmake --install {self.level_zero_build}") return self.level_zero_install @@ -142,8 +142,11 @@ def build_igc(self, repo, commit): ] run(configure_command) - # set timeout to 30min. IGC takes A LONG time to build if building from scratch. - run(f"cmake --build {self.igc_build} -j", timeout=600 * 3) + # set timeout to 2h. IGC takes A LONG time to build if building from scratch. + run( + f"cmake --build {self.igc_build} -j {options.build_jobs}", + timeout=60 * 60 * 2, + ) # cmake --install doesn't work... run("make install", cwd=self.igc_build) return self.igc_install @@ -214,7 +217,7 @@ def build_compute_runtime(self): configure_command.append(f"-DIGC_DIR={self.igc}") run(configure_command) - run(f"cmake --build {self.compute_runtime_build} -j") + run(f"cmake --build {self.compute_runtime_build} -j {options.build_jobs}") return self.compute_runtime_build diff --git a/devops/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py similarity index 78% rename from devops/scripts/benchmarks/benches/oneapi.py rename to devops/scripts/benchmarks/utils/oneapi.py index 0547f6646e39e..fc27b9a8b2d3e 100644 --- a/devops/scripts/benchmarks/benches/oneapi.py +++ b/devops/scripts/benchmarks/utils/oneapi.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,29 +7,33 @@ from utils.utils import download, run from options import options import os +import hashlib class OneAPI: - # random unique number for benchmark oneAPI installation - ONEAPI_BENCHMARK_INSTANCE_ID = 987654 - def __init__(self): self.oneapi_dir = os.path.join(options.workdir, "oneapi") Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True) - # delete if some option is set? + self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir) # can we just hardcode these links? self.install_package( "dnnl", "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh", + "6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b", ) self.install_package( "mkl", "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh", + "122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002", ) return - def install_package(self, name, url): + def generate_unique_oneapi_id(self, path): + hash_object = hashlib.md5(path.encode()) + return hash_object.hexdigest() + + def install_package(self, name, url, checksum): package_path = os.path.join(self.oneapi_dir, name) if Path(package_path).exists(): print( @@ -37,11 +41,13 @@ def install_package(self, name, url): ) return - package = download(self.oneapi_dir, url, f"package_{name}.sh") + package = download( + self.oneapi_dir, url, f"package_{name}.sh", checksum=checksum + ) try: print(f"installing {name}") run( - f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}" + f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance {self.oneapi_instance_id}" ) except: print("oneAPI installation likely exists already") diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py new file mode 100644 index 0000000000000..14a2ffa905f34 --- /dev/null +++ b/devops/scripts/benchmarks/utils/result.py @@ -0,0 +1,69 @@ +# Copyright (C) 2024-2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from dataclasses import dataclass, field +from dataclasses_json import config, dataclass_json +from datetime import datetime + + +@dataclass_json +@dataclass +class Result: + label: str + value: float + command: list[str] + env: dict[str, str] + stdout: str + passed: bool = True + unit: str = "" + explicit_group: str = "" + # stddev can be optionally set by the benchmark, + # if not set, it will be calculated automatically. + stddev: float = 0.0 + git_url: str = "" + git_hash: str = "" + # values below should not be set by the benchmark + name: str = "" + lower_is_better: bool = True + suite: str = "Unknown" + +@dataclass_json +@dataclass +class BenchmarkRun: + results: list[Result] + name: str = "This PR" + hostname: str = "Unknown" + git_hash: str = "" + github_repo: str = None + date: datetime = field( + default=None, + metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat), + ) + + +@dataclass_json +@dataclass +class BenchmarkTag: + name: str + description: str = "" + + +@dataclass_json +@dataclass +class BenchmarkMetadata: + type: str = "benchmark" # or 'group' + description: str = None + notes: str = None + unstable: str = None + tags: list[str] = field(default_factory=list) + + +@dataclass_json +@dataclass +class BenchmarkOutput: + runs: list[BenchmarkRun] + metadata: dict[str, BenchmarkMetadata] + tags: dict[str, BenchmarkTag] + default_compare_names: list[str] = field(default_factory=list) diff --git a/devops/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py index 3a516e8d724f7..54f2ef7fb9c1f 100644 --- a/devops/scripts/benchmarks/utils/utils.py +++ b/devops/scripts/benchmarks/utils/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,6 +12,7 @@ import urllib # nosec B404 from options import options from pathlib import Path +import hashlib def run( @@ -45,6 +46,12 @@ def run( env.update(env_vars) + if options.verbose: + command_str = " ".join(command) + env_str = " ".join(f"{key}={value}" for key, value in env_vars.items()) + full_command_str = f"{env_str} {command_str}".strip() + print(f"Running: {full_command_str}") + result = subprocess.run( command, cwd=cwd, @@ -107,7 +114,7 @@ def prepare_workdir(dir, version): shutil.rmtree(dir) else: raise Exception( - f"The directory {dir} exists but is a benchmark work directory." + f"The directory {dir} exists but is not a benchmark work directory." ) os.makedirs(dir) @@ -128,11 +135,26 @@ def create_build_path(directory, name): return build_path -def download(dir, url, file, untar=False, unzip=False): +def calculate_checksum(file_path): + sha_hash = hashlib.sha384() + with open(file_path, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha_hash.update(byte_block) + return sha_hash.hexdigest() + + +def download(dir, url, file, untar=False, unzip=False, checksum=""): data_file = os.path.join(dir, file) if not Path(data_file).exists(): print(f"{data_file} does not exist, downloading") urllib.request.urlretrieve(url, data_file) + calculated_checksum = calculate_checksum(data_file) + if calculated_checksum != checksum: + print( + f"Checksum mismatch: expected {checksum}, got {calculated_checksum}. Refusing to continue." + ) + exit(1) + if untar: file = tarfile.open(data_file) file.extractall(dir)