Skip to content
10 changes: 4 additions & 6 deletions src/guidellm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ class Environment(str, Enum):

ENV_REPORT_MAPPING = {
Environment.PROD: "https://guidellm.neuralmagic.com/local-report/index.html",
Environment.STAGING: "https://staging.guidellm.neuralmagic.com/local-report/index.html",
Environment.DEV: "https://dev.guidellm.neuralmagic.com/local-report/index.html",
Environment.LOCAL: "tests/dummy/report.html",
Environment.STAGING: "https://review.neuralmagic.com/guidellm-ui/staging/index.html",
Environment.DEV: "https://review.neuralmagic.com/guidellm-ui/dev/index.html",
Environment.LOCAL: "http://localhost:3000/index.html",
}


Expand Down Expand Up @@ -112,8 +112,6 @@ class ReportGenerationSettings(BaseModel):
"""

source: str = ""
report_html_match: str = "window.report_data = {};"
report_html_placeholder: str = "{}"


class Settings(BaseSettings):
Expand All @@ -138,7 +136,7 @@ class Settings(BaseSettings):
)

# general settings
env: Environment = Environment.PROD
env: Environment = Environment.DEV
request_timeout: int = 60 * 5 # 5 minutes
request_http2: bool = True
max_concurrency: int = 512
Expand Down
16 changes: 16 additions & 0 deletions src/guidellm/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,22 @@ def output_token_throughput(self) -> float:

return output_tokens / self.duration if self.duration else 0.0

@property
def output_token_throughput_distribution(self) -> Distribution:
"""
Get the distribution for output token throughput.

:return: The distribution of output token throughput.
:rtype: Distribution
"""
throughputs = []
for r in self.results:
duration = (r.end_time or 0) - (r.start_time or 0)
if duration > 0:
throughputs.append(r.output_token_count / duration)

return Distribution(data=throughputs)

Comment on lines +404 to +418
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The UI relies on the output throughput distribution, and I didn't find any methods/properties that were in the token/(unit of time) shape the UI expects so I added this.

@property
def prompt_token_distribution(self) -> Distribution:
"""
Expand Down
6 changes: 5 additions & 1 deletion src/guidellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Literal, Mapping, Optional, Union, get_args

import click
from guidellm.utils.injector import create_report
from loguru import logger
from transformers import AutoTokenizer # type: ignore[import-untyped]

Expand All @@ -15,6 +16,7 @@
)
from guidellm.request.base import RequestGenerator
from guidellm.utils import BenchmarkReportProgress, cli_params
from guidellm.utils.generate_ui_data import generate_ui_api_data

__all__ = ["generate_benchmark_report"]

Expand Down Expand Up @@ -184,7 +186,6 @@ def generate_benchmark_report_cli(
cont_refresh_table=enable_continuous_refresh,
)


def generate_benchmark_report(
target: str,
data: Optional[str],
Expand Down Expand Up @@ -290,6 +291,9 @@ def generate_benchmark_report(
)
report = asyncio.run(_run_executor_for_result(executor))

js_data = generate_ui_api_data(report)
create_report(js_data, 'guidellm_report')

# Save and print report
guidance_report = GuidanceReport()
guidance_report.benchmarks.append(report)
Expand Down
3 changes: 3 additions & 0 deletions src/guidellm/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .generate_ui_data import generate_ui_api_data
from .injector import create_report, inject_data
from .progress import BenchmarkReportProgress
from .text import (
Expand All @@ -24,6 +25,7 @@
"clean_text",
"create_report",
"filter_text",
"generate_ui_api_data",
"inject_data",
"is_path",
"is_path_like",
Expand All @@ -37,4 +39,5 @@
"resolve_transformers_dataset_split",
"split_lines_by_punctuation",
"split_text",
"stretch_list",
]
188 changes: 188 additions & 0 deletions src/guidellm/utils/generate_ui_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import os
import json
import random
import math
from typing import Any, Dict, List
from guidellm.core.distribution import Distribution
from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark

def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
total = dist.__len__()
mean = dist.mean
median = dist.median
minv = dist.min
maxv = dist.max
std_dev = dist.std_deviation

pvals = dist.percentiles([50, 90, 95, 99])

percentile_list = [
{"percentile": "p50", "value": pvals[0]},
{"percentile": "p90", "value": pvals[1]},
{"percentile": "p95", "value": pvals[2]},
{"percentile": "p99", "value": pvals[3]},
]

if dist.range == 0:
buckets = [{"value": minv, "count": total}]
bucket_width = 0
else:
bucket_width = dist.range / n_buckets
bucket_counts = [0] * n_buckets

for val in dist.data:

idx = int((val - minv) // bucket_width)
if idx == n_buckets:
idx = n_buckets - 1
bucket_counts[idx] += 1

buckets = []
for i, count in enumerate(bucket_counts):
bucket_start = minv + i * bucket_width
buckets.append({
"value": bucket_start,
"count": count
})
Comment on lines +30 to +46
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure the proper way to generate these buckets or if there is code somewhere else in guidellm that could manage this and I missed it.

But this code assumes we have a set number of buckets we want to generate and then determines the bucket width based off of that. It is a hard coded approach, and some data analysis first might result in a better number of buckets or bucket size. But generally I figured the UI would look good with there being a set number of buckets so our histograms conveniently look the same and take up a comfortable amount of space.


return {
metric_label: {
"statistics": {
"total": total,
"mean": mean,
"median": median,
"min": minv,
"max": maxv,
"std": std_dev,
},
"percentiles": percentile_list,
"buckets": buckets,
"bucketWidth": bucket_width,
}
}

def generate_run_info(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None)
return {
"model": {
"name": report.args.get('model', 'N/A'),
"size": 0
},
"task": "N/A",
"dataset": {
"name": "N/A"
},
"timestamp": timestamp
}

def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
filtered_benchmarks = filter(lambda bm: bm.start_time is not None, benchmarks)
sorted_benchmarks = list(sorted(filtered_benchmarks, key=lambda bm: bm.start_time))
min_start_time = sorted_benchmarks[0].start_time

all_request_times = [
result.start_time - min_start_time
for benchmark in sorted_benchmarks
for result in benchmark.results
if result.start_time is not None
]

request_distribution = Distribution(data=all_request_times)
final_result = generate_metric_report(request_distribution, "requestsOverTime")
return { "numBenchmarks": len(sorted_benchmarks), **final_result }


def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
all_prompt_token_data = [data for benchmark in benchmarks for data in benchmark.prompt_token_distribution.data]
all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
all_output_token_data = [data for benchmark in benchmarks for data in benchmark.output_token_distribution.data]
all_output_token_distribution = Distribution(data=all_output_token_data)

prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")

prompt_token_samples = [result.request.prompt for benchmark in benchmarks for result in benchmark.results]
output_token_samples = [result.output for benchmark in benchmarks for result in benchmark.results]

num_samples = min(5, len(prompt_token_samples), len(output_token_samples))
sample_indices = random.sample(range(len(prompt_token_samples)), num_samples)

sample_prompts = [prompt_token_samples[i] for i in sample_indices]
"""
Need a wholistic approach to parsing out characters in the prompt that don't covert well into the format we need
"""
sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts))

sample_outputs = [output_token_samples[i] for i in sample_indices]
sample_outputs = list(map(lambda output: output.replace("\n", " ").replace("\"", "'"), sample_outputs))

request_over_time_results = generate_request_over_time_data(benchmarks)

return {
"prompts": {
"samples": sample_prompts,
**prompt_token_data
},
"generations": {
"samples": sample_outputs,
**output_token_data
},
"requestsOverTime": request_over_time_results,
"rateType": report.args["mode"],
"server": {
"target": report.args.get('target', 'N/A')
}
}

def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
ttft_dist_ms = Distribution(data=bm.ttft_distribution.data)
ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
itl_dist_ms = Distribution(data=bm.itl_distribution.data)
itl_data = generate_metric_report(itl_dist_ms, 'tpot')
throughput_dist_ms = Distribution(data=bm.output_token_throughput_distribution.data)
throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data])
latency__data = generate_metric_report(latency_dist_ms, 'timePerRequest')
return {
"requestsPerSecond": bm.completed_request_rate,
**itl_data,
**ttft_data,
**throughput_data,
**latency__data,
}

def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
benchmark_json = []
for benchmark in benchmarks:
benchmarks_report = generate_benchmark_json(benchmark)
benchmark_json.append(benchmarks_report)

return { "benchmarks": benchmark_json }

def generate_js_variable(variable_name: str, data: dict) -> str:
json_data = json.dumps(data, indent=2)
return f'window.{variable_name} = {json_data};'

def generate_ui_api_data(report: TextGenerationBenchmarkReport):
filtered_benchmarks = list(filter(lambda bm: (bm.completed_request_rate > 0) and bm.mode != 'throughput', report.benchmarks))
run_info_data = generate_run_info(report, filtered_benchmarks)
workload_details_data = generate_workload_details(report, filtered_benchmarks)
benchmarks_data = generate_benchmarks_json(filtered_benchmarks)
run_info_script = generate_js_variable("run_info", run_info_data)
workload_details_script = generate_js_variable("workload_details", workload_details_data)
benchmarks_script = generate_js_variable("benchmarks", benchmarks_data)

os.makedirs("ben_test", exist_ok=True)
# generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI
with open("ben_test/run_info.js", "w") as f:
f.write(run_info_script)
with open("ben_test/workload_details.js", "w") as f:
f.write(workload_details_script)
with open("ben_test/benchmarks.js", "w") as f:
f.write(benchmarks_script)

return {
"window.run_info = {};": run_info_script,
"window.workload_details = {};": workload_details_script,
"window.benchmarks = {};": benchmarks_script,
}
41 changes: 14 additions & 27 deletions src/guidellm/utils/injector.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
from pathlib import Path
from typing import Union

from pydantic import BaseModel

from guidellm.config import settings
from guidellm.utils.text import load_text

__all__ = ["create_report", "inject_data"]


def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
"""
Creates a report from the model and saves it to the output path.
Creates a report from the dictionary and saves it to the output path.

:param model: the model to serialize and inject
:type model: BaseModel
:param js_data: dict with match str and json data to inject
:type js_data: dict
:param output_path: the path, either a file or a directory,
to save the report to. If a directory, the report will be saved
as "report.html" inside of the directory.
Expand All @@ -27,10 +25,8 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:

html_content = load_text(settings.report_generation.source)
report_content = inject_data(
model,
js_data,
html_content,
settings.report_generation.report_html_match,
settings.report_generation.report_html_placeholder,
)

if not output_path.suffix:
Expand All @@ -39,32 +35,23 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:

output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(report_content)

print(f'Report saved to {output_path}')
return output_path


def inject_data(
model: BaseModel,
js_data: dict,
html: str,
match: str,
placeholder: str,
) -> str:
"""
Injects the data from the model into the HTML while replacing the placeholder.
Injects the json data into the HTML while replacing the placeholder.

:param model: the model to serialize and inject
:type model: BaseModel
:param js_data: the json data to inject
:type js_data: dict
:param html: the html to inject the data into
:type html: str
:param match: the string to match in the html to find the placeholder
:type match: str
:param placeholder: the placeholder to replace with the model data
inside of the placeholder
:type placeholder: str
:return: the html with the model data injected
:return: the html with the json data injected
:rtype: str
"""
model_str = model.json()
inject_str = match.replace(placeholder, model_str)

return html.replace(match, inject_str)
for placeholder, script in js_data.items():
html = html.replace(placeholder, script)
return html
Loading