Skip to content

Commit 4d45364

Browse files
committed
Fix float format
1 parent df5136e commit 4d45364

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

scripts/make_leaderboard.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@ def load_evaluation_data(result_dir: str, model: str, task_dirs: List[str]) -> d
5050
if metric not in eval_mm.ScorerRegistry.get_metric_list():
5151
logger.warning(f"Skipping unsupported metric: {metric}")
5252
continue
53-
54-
model_results[f"{task_dir}/{metric}"] = aggregate_output["overall_score"]
53+
overall_score = aggregate_output["overall_score"]
54+
if metric in ["jdocqa", "jmmmu", "jic-vqa", "mecha-ja", "mmmu"]:
55+
overall_score = overall_score * 100
56+
model_results[f"{task_dir}/{metric}"] = overall_score
5557

5658
return model_results
5759

@@ -169,8 +171,8 @@ def format_output(df: pd.DataFrame, output_format: str) -> str:
169171
for col in df.columns:
170172
top1_model = df[col].astype(float).idxmax()
171173
top2_model = df[col].astype(float).nlargest(2).index[-1]
172-
top1_score = f"{float(df.loc[top1_model, col]):.3g}"
173-
top2_score = f"{float(df.loc[top2_model, col]):.3g}"
174+
top1_score = f"{float(df.loc[top1_model, col]):.1f}"
175+
top2_score = f"{float(df.loc[top2_model, col]):.1f}"
174176
# apply formatting
175177
if output_format == "latex":
176178
df.loc[top1_model, col] = f"\\textbf{{{top1_score}}}"
@@ -184,9 +186,11 @@ def format_output(df: pd.DataFrame, output_format: str) -> str:
184186
df = df.fillna("")
185187

186188
if output_format == "markdown":
187-
return df.to_markdown(mode="github", floatfmt=".3g")
189+
return df.to_markdown(mode="github", floatfmt=".1f")
188190
elif output_format == "latex":
189-
return df.to_latex(float_format="%.3g")
191+
return df.to_latex(
192+
float_format="%.1f", column_format="l" + "c" * len(df.columns)
193+
)
190194
return ""
191195

192196

0 commit comments

Comments
 (0)