Skip to content

Commit 7bfbfc4

Browse files
committed
Fix bug and Add Avg column
1 parent b5736b5 commit 7bfbfc4

File tree

2 files changed

+102
-4
lines changed

2 files changed

+102
-4
lines changed

github_pages/public/leaderboard.json

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
},
3838
"JMMMU": {
3939
"Acc": 0.26
40+
},
41+
"Avg": {
42+
"Avg": 41.98
4043
}
4144
}
4245
},
@@ -78,6 +81,9 @@
7881
},
7982
"JMMMU": {
8083
"Acc": 0.03
84+
},
85+
"Avg": {
86+
"Avg": 45.66
8187
}
8288
}
8389
},
@@ -119,6 +125,9 @@
119125
},
120126
"JMMMU": {
121127
"Acc": 0.36
128+
},
129+
"Avg": {
130+
"Avg": 66.84
122131
}
123132
}
124133
},
@@ -160,6 +169,9 @@
160169
},
161170
"JMMMU": {
162171
"Acc": 0.06
172+
},
173+
"Avg": {
174+
"Avg": 48.99
163175
}
164176
}
165177
},
@@ -201,6 +213,9 @@
201213
},
202214
"JMMMU": {
203215
"Acc": 0.19
216+
},
217+
"Avg": {
218+
"Avg": 71.43
204219
}
205220
}
206221
},
@@ -242,6 +257,9 @@
242257
},
243258
"JMMMU": {
244259
"Acc": 0.39
260+
},
261+
"Avg": {
262+
"Avg": 67.55
245263
}
246264
}
247265
},
@@ -283,6 +301,9 @@
283301
},
284302
"JMMMU": {
285303
"Acc": 0.43
304+
},
305+
"Avg": {
306+
"Avg": 69.84
286307
}
287308
}
288309
},
@@ -324,6 +345,9 @@
324345
},
325346
"JMMMU": {
326347
"Acc": 0.22
348+
},
349+
"Avg": {
350+
"Avg": 40.0
327351
}
328352
}
329353
},
@@ -365,6 +389,9 @@
365389
},
366390
"JMMMU": {
367391
"Acc": 0.3
392+
},
393+
"Avg": {
394+
"Avg": 58.31
368395
}
369396
}
370397
},
@@ -406,6 +433,9 @@
406433
},
407434
"JMMMU": {
408435
"Acc": 0.25
436+
},
437+
"Avg": {
438+
"Avg": 53.61
409439
}
410440
}
411441
},
@@ -447,6 +477,9 @@
447477
},
448478
"JMMMU": {
449479
"Acc": 0.37
480+
},
481+
"Avg": {
482+
"Avg": 74.26
450483
}
451484
}
452485
},
@@ -488,6 +521,9 @@
488521
},
489522
"JMMMU": {
490523
"Acc": 0.19
524+
},
525+
"Avg": {
526+
"Avg": 67.41
491527
}
492528
}
493529
},
@@ -529,6 +565,9 @@
529565
},
530566
"JMMMU": {
531567
"Acc": 0.35
568+
},
569+
"Avg": {
570+
"Avg": 62.73
532571
}
533572
}
534573
},
@@ -543,6 +582,9 @@
543582
"LLM": 3.23,
544583
"Rouge": 12.97
545584
},
585+
"JIC": {
586+
"Acc": 0.58
587+
},
546588
"MECHA": {
547589
"Acc": 0.46
548590
},
@@ -567,6 +609,9 @@
567609
},
568610
"JMMMU": {
569611
"Acc": 0.33
612+
},
613+
"Avg": {
614+
"Avg": 64.57
570615
}
571616
}
572617
},
@@ -608,6 +653,9 @@
608653
},
609654
"JMMMU": {
610655
"Acc": 0.39
656+
},
657+
"Avg": {
658+
"Avg": 66.59
611659
}
612660
}
613661
},
@@ -649,6 +697,9 @@
649697
},
650698
"JMMMU": {
651699
"Acc": 0.39
700+
},
701+
"Avg": {
702+
"Avg": 67.57
652703
}
653704
}
654705
},
@@ -690,6 +741,9 @@
690741
},
691742
"JMMMU": {
692743
"Acc": 0.48
744+
},
745+
"Avg": {
746+
"Avg": 78.59
693747
}
694748
}
695749
},
@@ -731,6 +785,9 @@
731785
},
732786
"JMMMU": {
733787
"Acc": 0.61
788+
},
789+
"Avg": {
790+
"Avg": 87.61
734791
}
735792
}
736793
},
@@ -772,6 +829,9 @@
772829
},
773830
"JMMMU": {
774831
"Acc": 0.37
832+
},
833+
"Avg": {
834+
"Avg": 68.08
775835
}
776836
}
777837
},
@@ -813,6 +873,9 @@
813873
},
814874
"JMMMU": {
815875
"Acc": 0.48
876+
},
877+
"Avg": {
878+
"Avg": 78.04
816879
}
817880
}
818881
},
@@ -854,6 +917,9 @@
854917
},
855918
"JMMMU": {
856919
"Acc": 0.51
920+
},
921+
"Avg": {
922+
"Avg": 78.5
857923
}
858924
}
859925
},
@@ -895,6 +961,9 @@
895961
},
896962
"JMMMU": {
897963
"Acc": 0.39
964+
},
965+
"Avg": {
966+
"Avg": 67.33
898967
}
899968
}
900969
},
@@ -936,6 +1005,9 @@
9361005
},
9371006
"JMMMU": {
9381007
"Acc": 0.57
1008+
},
1009+
"Avg": {
1010+
"Avg": 87.94
9391011
}
9401012
}
9411013
}

scripts/make_leaderboard.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ def process_results(result_dir: str, model_list: List[str]) -> pd.DataFrame:
7575
for k in df.columns
7676
}
7777
)
78+
# すべてのスコアを 100 点満点に正規化
79+
df_normalized = df.apply(lambda x: x / x.max() * 100, axis=0)
80+
81+
# 各モデルの全体スコア(平均)を計算し、最後の列に追加
82+
df["Avg/Avg"] = df_normalized.mean(axis=1).round(2)
83+
7884
return df
7985

8086

@@ -93,6 +99,9 @@ def generate_json_path(df: pd.DataFrame, output_path: str):
9399

94100
for col, score in row.items():
95101
if isinstance(score, (int, float)) and not pd.isna(score):
102+
if "/" not in col:
103+
model_entry["scores"][col] = score
104+
continue
96105
task, metric = col.split("/")
97106
if task not in model_entry["scores"]:
98107
model_entry["scores"][task] = {}
@@ -145,6 +154,23 @@ def plot_task_performance(df: pd.DataFrame):
145154

146155
def format_output(df: pd.DataFrame, output_format: str) -> str:
147156
"""Format the DataFrame output for markdown or LaTeX."""
157+
158+
# textbf top1 score and underline top2 score for each task
159+
for col in df.columns:
160+
top1_model = df[col].astype(float).idxmax()
161+
top2_model = df[col].astype(float).nlargest(2).index[-1]
162+
top1_score = f"{float(df.loc[top1_model, col]):.3g}"
163+
top2_score = f"{float(df.loc[top2_model, col]):.3g}"
164+
# apply formatting
165+
if output_format == "latex":
166+
df.loc[top1_model, col] = f"\\textbf{{{top1_score}}}"
167+
df.loc[top2_model, col] = f"\\textit{{{top2_score}}}"
168+
df.loc[top2_model, col] = f"\\underline{{{top2_score}}}"
169+
else:
170+
df.loc[top1_model, col] = f"**{top1_score}**"
171+
df.loc[top2_model, col] = f"*{top2_score}*"
172+
df.loc[top2_model, col] = f"<u>{top2_score}</u>"
173+
148174
df = df.fillna("")
149175
if output_format == "markdown":
150176
return df.to_markdown(mode="github", floatfmt=".3g")
@@ -164,20 +190,20 @@ def main(
164190
):
165191
df = process_results(result_dir, model_list)
166192
if plot_corr:
167-
plot_correlation(df, "correlation.png")
193+
plot_correlation(df.copy(), "correlation.png")
168194
# plot_correlation(df.T, "correlation_model.png")
169195
if plot_bar:
170-
plot_task_performance(df)
196+
plot_task_performance(df.copy())
171197

172-
table = format_output(df, output_format)
198+
table = format_output(df.copy(), output_format)
173199
print(table)
174200

175201
if output_path:
176202
with open(output_path, "w") as f:
177203
f.write(table)
178204

179205
if update_pages:
180-
generate_json_path(df, "github_pages/public/leaderboard.json")
206+
generate_json_path(df.copy(), "github_pages/public/leaderboard.json")
181207

182208

183209
def parse_args():

0 commit comments

Comments
 (0)