vllm-project · DaltheCow · Mar 5, 2025 · Mar 5, 2025 · Mar 7, 2025 · Mar 11, 2025
diff --git a/src/guidellm/config.py b/src/guidellm/config.py
@@ -32,9 +32,9 @@ class Environment(str, Enum):
 
 ENV_REPORT_MAPPING = {
     Environment.PROD: "https://guidellm.neuralmagic.com/local-report/index.html",
-    Environment.STAGING: "https://staging.guidellm.neuralmagic.com/local-report/index.html",
-    Environment.DEV: "https://dev.guidellm.neuralmagic.com/local-report/index.html",
-    Environment.LOCAL: "tests/dummy/report.html",
+    Environment.STAGING: "https://review.neuralmagic.com/guidellm-ui/staging/index.html",
+    Environment.DEV: "https://review.neuralmagic.com/guidellm-ui/dev/index.html",
+    Environment.LOCAL: "http://localhost:3000/index.html",
 }
 
 
@@ -112,8 +112,6 @@ class ReportGenerationSettings(BaseModel):
     """
 
     source: str = ""
-    report_html_match: str = "window.report_data = {};"
-    report_html_placeholder: str = "{}"
 
 
 class Settings(BaseSettings):
@@ -138,7 +136,7 @@ class Settings(BaseSettings):
     )
 
     # general settings
-    env: Environment = Environment.PROD
+    env: Environment = Environment.DEV
     request_timeout: int = 60 * 5  # 5 minutes
     request_http2: bool = True
     max_concurrency: int = 512

diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py
@@ -400,6 +400,22 @@ def output_token_throughput(self) -> float:
 
         return output_tokens / self.duration if self.duration else 0.0
 
+    @property
+    def output_token_throughput_distribution(self) -> Distribution:
+        """
+        Get the distribution for output token throughput.
+
+        :return: The distribution of output token throughput.
+        :rtype: Distribution
+        """
+        throughputs = []
+        for r in self.results:
+            duration = (r.end_time or 0) - (r.start_time or 0)
+            if duration > 0:
+                throughputs.append(r.output_token_count / duration)
+
+        return Distribution(data=throughputs)
+
     @property
     def prompt_token_distribution(self) -> Distribution:
         """

diff --git a/src/guidellm/main.py b/src/guidellm/main.py
@@ -2,6 +2,7 @@
 from typing import Any, Literal, Mapping, Optional, Union, get_args
 
 import click
+from guidellm.utils.injector import create_report
 from loguru import logger
 from transformers import AutoTokenizer  # type: ignore[import-untyped]
 
@@ -15,6 +16,7 @@
 )
 from guidellm.request.base import RequestGenerator
 from guidellm.utils import BenchmarkReportProgress, cli_params
+from guidellm.utils.generate_ui_data import generate_ui_api_data
 
 __all__ = ["generate_benchmark_report"]
 
@@ -184,7 +186,6 @@ def generate_benchmark_report_cli(
         cont_refresh_table=enable_continuous_refresh,
     )
 
-
 def generate_benchmark_report(
     target: str,
     data: Optional[str],
@@ -290,6 +291,9 @@ def generate_benchmark_report(
     )
     report = asyncio.run(_run_executor_for_result(executor))
 
+    js_data = generate_ui_api_data(report)
+    create_report(js_data, 'guidellm_report')
+
     # Save and print report
     guidance_report = GuidanceReport()
     guidance_report.benchmarks.append(report)

diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
@@ -1,3 +1,4 @@
+from .generate_ui_data import generate_ui_api_data
 from .injector import create_report, inject_data
 from .progress import BenchmarkReportProgress
 from .text import (
@@ -24,6 +25,7 @@
     "clean_text",
     "create_report",
     "filter_text",
+    "generate_ui_api_data",
     "inject_data",
     "is_path",
     "is_path_like",
@@ -37,4 +39,5 @@
     "resolve_transformers_dataset_split",
     "split_lines_by_punctuation",
     "split_text",
+    "stretch_list",
 ]
diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
@@ -0,0 +1,188 @@
+import os
+import json
+import random
+import math
+from typing import Any, Dict, List
+from guidellm.core.distribution import Distribution
+from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark
+
+def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
+    total = dist.__len__()
+    mean = dist.mean
+    median = dist.median
+    minv = dist.min
+    maxv = dist.max
+    std_dev = dist.std_deviation
+
+    pvals = dist.percentiles([50, 90, 95, 99])
+
+    percentile_list = [
+        {"percentile": "p50", "value": pvals[0]},
+        {"percentile": "p90", "value": pvals[1]},
+        {"percentile": "p95", "value": pvals[2]},
+        {"percentile": "p99", "value": pvals[3]},
+    ]
+
+    if dist.range == 0:
+        buckets = [{"value": minv, "count": total}]
+        bucket_width = 0
+    else:
+        bucket_width = dist.range / n_buckets
+        bucket_counts = [0] * n_buckets
+
+        for val in dist.data:
+
+            idx = int((val - minv) // bucket_width)
+            if idx == n_buckets:
+                idx = n_buckets - 1
+            bucket_counts[idx] += 1
+
+        buckets = []
+        for i, count in enumerate(bucket_counts):
+            bucket_start = minv + i * bucket_width
+            buckets.append({
+                "value": bucket_start,
+                "count": count
+            })
+
+    return {
+        metric_label: {
+            "statistics": {
+                "total": total,
+                "mean": mean,
+                "median": median,
+                "min": minv,
+                "max": maxv,
+                "std": std_dev,
+            },
+            "percentiles": percentile_list,
+            "buckets": buckets,
+            "bucketWidth": bucket_width,
+        }
+    }
+
+def generate_run_info(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
+    timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None)
+    return {
+        "model": {
+            "name": report.args.get('model', 'N/A'),
+            "size": 0
+        },
+        "task": "N/A",
+        "dataset": {
+            "name": "N/A"
+        },
+        "timestamp": timestamp
+    }
+
+def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
+    filtered_benchmarks = filter(lambda bm: bm.start_time is not None, benchmarks)
+    sorted_benchmarks = list(sorted(filtered_benchmarks, key=lambda bm: bm.start_time))
+    min_start_time = sorted_benchmarks[0].start_time
+
+    all_request_times = [
+        result.start_time - min_start_time
+        for benchmark in sorted_benchmarks
+        for result in benchmark.results
+        if result.start_time is not None
+    ]
+
+    request_distribution = Distribution(data=all_request_times)
+    final_result = generate_metric_report(request_distribution, "requestsOverTime")
+    return { "numBenchmarks": len(sorted_benchmarks), **final_result }
+
+
+def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
+    all_prompt_token_data = [data for benchmark in benchmarks for data in benchmark.prompt_token_distribution.data]
+    all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
+    all_output_token_data = [data for benchmark in benchmarks for data in benchmark.output_token_distribution.data]
+    all_output_token_distribution = Distribution(data=all_output_token_data)
+
+    prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
+    output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
+
+    prompt_token_samples = [result.request.prompt for benchmark in benchmarks for result in benchmark.results]
+    output_token_samples = [result.output for benchmark in benchmarks for result in benchmark.results]
+
+    num_samples = min(5, len(prompt_token_samples), len(output_token_samples))
+    sample_indices = random.sample(range(len(prompt_token_samples)), num_samples)
+
+    sample_prompts = [prompt_token_samples[i] for i in sample_indices]
+    """
+    Need a wholistic approach to parsing out characters in the prompt that don't covert well into the format we need
+    """
+    sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts))
+
+    sample_outputs = [output_token_samples[i] for i in sample_indices]
+    sample_outputs = list(map(lambda output: output.replace("\n", " ").replace("\"", "'"), sample_outputs))
+
+    request_over_time_results = generate_request_over_time_data(benchmarks)
+
+    return {
+        "prompts": {
+            "samples": sample_prompts,
+            **prompt_token_data
+        },
+        "generations": {
+            "samples": sample_outputs,
+            **output_token_data
+        },
+        "requestsOverTime": request_over_time_results,
+        "rateType": report.args["mode"],
+        "server": {
+            "target": report.args.get('target', 'N/A')
+        }
+    }
+
+def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
+    ttft_dist_ms = Distribution(data=bm.ttft_distribution.data)
+    ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
+    itl_dist_ms = Distribution(data=bm.itl_distribution.data)
+    itl_data = generate_metric_report(itl_dist_ms, 'tpot')
+    throughput_dist_ms = Distribution(data=bm.output_token_throughput_distribution.data)
+    throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
+    latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data])
+    latency__data = generate_metric_report(latency_dist_ms, 'timePerRequest')
+    return {
+        "requestsPerSecond": bm.completed_request_rate,
+        **itl_data,
+        **ttft_data,
+        **throughput_data,
+        **latency__data,
+    }
+
+def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
+    benchmark_json = []
+    for benchmark in benchmarks:
+        benchmarks_report = generate_benchmark_json(benchmark)
+        benchmark_json.append(benchmarks_report)
+
+    return { "benchmarks": benchmark_json }
+
+def generate_js_variable(variable_name: str, data: dict) -> str:
+    json_data = json.dumps(data, indent=2)
+    return f'window.{variable_name} = {json_data};'
+
+def generate_ui_api_data(report: TextGenerationBenchmarkReport):
+    filtered_benchmarks = list(filter(lambda bm: (bm.completed_request_rate > 0) and bm.mode != 'throughput', report.benchmarks))
+    run_info_data = generate_run_info(report, filtered_benchmarks)
+    workload_details_data = generate_workload_details(report, filtered_benchmarks)
+    benchmarks_data = generate_benchmarks_json(filtered_benchmarks)
+    run_info_script = generate_js_variable("run_info", run_info_data)
+    workload_details_script = generate_js_variable("workload_details", workload_details_data)
+    benchmarks_script = generate_js_variable("benchmarks", benchmarks_data)
+
+    os.makedirs("ben_test", exist_ok=True)
+    # generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI
+    with open("ben_test/run_info.js", "w") as f:
+        f.write(run_info_script)
+    with open("ben_test/workload_details.js", "w") as f:
+        f.write(workload_details_script)
+    with open("ben_test/benchmarks.js", "w") as f:
+        f.write(benchmarks_script)
+
+    return {
+        "window.run_info = {};": run_info_script,
+        "window.workload_details = {};": workload_details_script,
+        "window.benchmarks = {};": benchmarks_script,
+    }
diff --git a/src/guidellm/utils/injector.py b/src/guidellm/utils/injector.py
@@ -1,20 +1,18 @@
 from pathlib import Path
 from typing import Union
 
-from pydantic import BaseModel
-
 from guidellm.config import settings
 from guidellm.utils.text import load_text
 
 __all__ = ["create_report", "inject_data"]
 
 
-def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
+def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
     """
-    Creates a report from the model and saves it to the output path.
+    Creates a report from the dictionary and saves it to the output path.
 
-    :param model: the model to serialize and inject
-    :type model: BaseModel
+    :param js_data: dict with match str and json data to inject
+    :type js_data: dict
     :param output_path: the path, either a file or a directory,
         to save the report to. If a directory, the report will be saved
         as "report.html" inside of the directory.
@@ -27,10 +25,8 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
 
     html_content = load_text(settings.report_generation.source)
     report_content = inject_data(
-        model,
+        js_data,
         html_content,
-        settings.report_generation.report_html_match,
-        settings.report_generation.report_html_placeholder,
     )
 
     if not output_path.suffix:
@@ -39,32 +35,23 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
 
     output_path.parent.mkdir(parents=True, exist_ok=True)
     output_path.write_text(report_content)
-
+    print(f'Report saved to {output_path}')
     return output_path
 
-
 def inject_data(
-    model: BaseModel,
+    js_data: dict,
     html: str,
-    match: str,
-    placeholder: str,
 ) -> str:
     """
-    Injects the data from the model into the HTML while replacing the placeholder.
+    Injects the json data into the HTML while replacing the placeholder.
 
-    :param model: the model to serialize and inject
-    :type model: BaseModel
+    :param js_data: the json data to inject
+    :type js_data: dict
     :param html: the html to inject the data into
     :type html: str
-    :param match: the string to match in the html to find the placeholder
-    :type match: str
-    :param placeholder: the placeholder to replace with the model data
-        inside of the placeholder
-    :type placeholder: str
-    :return: the html with the model data injected
+    :return: the html with the json data injected
     :rtype: str
     """
-    model_str = model.json()
-    inject_str = match.replace(placeholder, model_str)
-
-    return html.replace(match, inject_str)
+    for placeholder, script in js_data.items():
+        html = html.replace(placeholder, script)
+    return html