diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py index 6de12a84de..f37359e26f 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/config.py +++ b/aws/lambda/benchmark_regression_summary_report/common/config.py @@ -11,6 +11,58 @@ ) +PYTORCH_HELION_CONFIG = BenchmarkConfig( + name="Helion Benchmark Regression", + id="pytorch_helion", + source=BenchmarkApiSource( + api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series", + type="benchmark_time_series_api", + api_endpoint_params_template=""" + { + "name": "pytorch_helion", + "query_params": { + "mode": "", + "branches": ["main"], + "repo": "pytorch/helion", + "device": "", + "arch":"", + "benchmarkName": "Helion Benchmark", + "startTime": "{{ startTime }}", + "stopTime": "{{ stopTime }}" + }, + "response_formats":["time_series"] + } + """, + ), + hud_info={ + "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_helion", + }, + # set baseline from past 4-8 days, and compare with the lastest 4 day + policy=Policy( + frequency=Frequency(value=1, unit="days"), + range=RangeConfig( + baseline=DayRangeWindow(value=4), + comparison=DayRangeWindow(value=4), + ), + metrics={ + "helion_speedup": RegressionPolicy( + name="helion_speedup", + condition="greater_equal", + threshold=0.85, + baseline_aggregation="median", + ), + }, + notification_config={ + "type": "github", + "repo": "pytorch/test-infra", + "issue": "7472", + }, + ), + report_config=ReportConfig( + report_level="insufficient_data", + ), +) + PYTORCH_OPERATOR_MICROBENCH_CONFIG = BenchmarkConfig( name="Pytorch Operator Microbench Regression", id="pytorch_operator_microbenchmark", @@ -146,6 +198,7 @@ configs={ "compiler_regression": COMPILER_BENCHMARK_CONFIG, "pytorch_operator_microbenchmark": PYTORCH_OPERATOR_MICROBENCH_CONFIG, + "pytorch_helion": PYTORCH_HELION_CONFIG, } ) diff --git a/aws/lambda/benchmark_regression_summary_report/common/regression_utils.py b/aws/lambda/benchmark_regression_summary_report/common/regression_utils.py index 96c89ce61e..4ba306c2bf 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/regression_utils.py +++ b/aws/lambda/benchmark_regression_summary_report/common/regression_utils.py @@ -77,16 +77,18 @@ class BenchmarkRegressionReport(TypedDict): def get_regression_status(regression_summary: BenchmarkRegressionSummary) -> str: - status = ( - "regression" - if regression_summary.get("regression_count", 0) > 0 - else ( - "suspicious" - if regression_summary.get("suspicious_count", 0) > 0 - else "no_regression" - ) - ) - return status + if regression_summary.get("regression_count", 0) > 0: + return "regression" + if regression_summary.get("suspicious_count", 0) > 0: + return "suspicious" + if regression_summary.get("insufficient_data_count", 0) > 0: + insufficient_data = regression_summary.get("insufficient_data_count", 0) + # default to 1 to avoid dividen issue + total = regression_summary.get("total_count", 1) + percentage = insufficient_data / total + if percentage >= 0.9: + return "insufficient_data" + return "no_regression" class BenchmarkRegressionReportGenerator: @@ -251,7 +253,19 @@ def _to_data_map( for d in sorted( ts_group.data, key=lambda d: isoparse(d["granularity_bucket"]) ): + # skip if field is not in data, or field is None if field not in d: + logger.warning( + "[_to_data_map] field %s not found or value is undefined", field + ) + continue + if d[field] is None or math.isnan(float(d[field])): + logger.warning( + "[_to_data_map] Skip %s with value %s with group key [%s]", + field, + d[field], + group_keys, + ) continue p: BenchmarkRegressionPoint = { diff --git a/torchci/components/benchmark_v3/configs/configurations.tsx b/torchci/components/benchmark_v3/configs/configurations.tsx index 6165ab21af..3935e31973 100644 --- a/torchci/components/benchmark_v3/configs/configurations.tsx +++ b/torchci/components/benchmark_v3/configs/configurations.tsx @@ -38,7 +38,7 @@ export const BENCHMARK_ID_MAPPING: Record = { * @returns */ export function getBenchmarkIdFromReportId(reportId: string): string { - return REPORT_ID_TO_BENCHMARK_ID_MAPPING[reportId] ?? ""; + return REPORT_ID_TO_BENCHMARK_ID_MAPPING[reportId] ?? reportId; } export function getBenchmarkIdMappingItem( diff --git a/torchci/components/benchmark_v3/pages/BenchmarkListPage.tsx b/torchci/components/benchmark_v3/pages/BenchmarkListPage.tsx index d00a45c0ca..2eff527984 100644 --- a/torchci/components/benchmark_v3/pages/BenchmarkListPage.tsx +++ b/torchci/components/benchmark_v3/pages/BenchmarkListPage.tsx @@ -12,7 +12,8 @@ export function getBenchmarkMainRouteById(id: string): string | undefined { } } } - return undefined; + // by default, form the v3 route to dashboard page + return `/benchmark/v3/dashboard/${id}`; } export function benchmarkCategoryCardToNavGroup(