Skip to content

Commit 257b014

Browse files
committed
Fix result
1 parent 2350756 commit 257b014

File tree

2 files changed

+23
-23
lines changed

2 files changed

+23
-23
lines changed

github_pages/public/leaderboard.json

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"macro_f1": 0.3
88
},
99
"CVQA": {
10-
"Acc": 0.24
10+
"Acc": 23.65
1111
},
1212
"Heron": {
1313
"LLM": 23.53
@@ -95,7 +95,7 @@
9595
"macro_f1": 10.44
9696
},
9797
"CVQA": {
98-
"Acc": 0.44
98+
"Acc": 43.84
9999
},
100100
"Heron": {
101101
"LLM": 47.59
@@ -142,7 +142,7 @@
142142
"macro_f1": 11.37
143143
},
144144
"CVQA": {
145-
"Acc": 0.07
145+
"Acc": 6.9
146146
},
147147
"Heron": {
148148
"LLM": 54.1
@@ -189,7 +189,7 @@
189189
"macro_f1": 11.55
190190
},
191191
"CVQA": {
192-
"Acc": 0.3
192+
"Acc": 29.56
193193
},
194194
"Heron": {
195195
"LLM": 68.03
@@ -236,7 +236,7 @@
236236
"macro_f1": 8.2
237237
},
238238
"CVQA": {
239-
"Acc": 0.49
239+
"Acc": 48.77
240240
},
241241
"Heron": {
242242
"LLM": 60.45
@@ -283,7 +283,7 @@
283283
"macro_f1": 23.23
284284
},
285285
"CVQA": {
286-
"Acc": 0.56
286+
"Acc": 56.16
287287
},
288288
"Heron": {
289289
"LLM": 60.15
@@ -368,7 +368,7 @@
368368
"macro_f1": 14.46
369369
},
370370
"CVQA": {
371-
"Acc": 0.41
371+
"Acc": 40.89
372372
},
373373
"Heron": {
374374
"LLM": 43.14
@@ -415,7 +415,7 @@
415415
"macro_f1": 20.12
416416
},
417417
"CVQA": {
418-
"Acc": 0.27
418+
"Acc": 26.6
419419
},
420420
"Heron": {
421421
"LLM": 30.04
@@ -462,7 +462,7 @@
462462
"macro_f1": 18.74
463463
},
464464
"CVQA": {
465-
"Acc": 0.48
465+
"Acc": 48.28
466466
},
467467
"Heron": {
468468
"LLM": 56.97
@@ -509,7 +509,7 @@
509509
"macro_f1": 27.17
510510
},
511511
"CVQA": {
512-
"Acc": 0.49
512+
"Acc": 49.26
513513
},
514514
"Heron": {
515515
"LLM": 60.88
@@ -556,7 +556,7 @@
556556
"macro_f1": 27.71
557557
},
558558
"CVQA": {
559-
"Acc": 0.52
559+
"Acc": 51.72
560560
},
561561
"Heron": {
562562
"LLM": 38.08
@@ -603,7 +603,7 @@
603603
"macro_f1": 20.56
604604
},
605605
"CVQA": {
606-
"Acc": 0.44
606+
"Acc": 43.84
607607
},
608608
"Heron": {
609609
"LLM": 46.93
@@ -650,7 +650,7 @@
650650
"macro_f1": 37.33
651651
},
652652
"CVQA": {
653-
"Acc": 0.48
653+
"Acc": 48.28
654654
},
655655
"Heron": {
656656
"LLM": 49.82
@@ -697,7 +697,7 @@
697697
"macro_f1": 38.51
698698
},
699699
"CVQA": {
700-
"Acc": 0.5
700+
"Acc": 50.25
701701
},
702702
"Heron": {
703703
"LLM": 59.69
@@ -744,7 +744,7 @@
744744
"macro_f1": 74.77
745745
},
746746
"CVQA": {
747-
"Acc": 0.56
747+
"Acc": 55.67
748748
},
749749
"Heron": {
750750
"LLM": 70.29
@@ -832,7 +832,7 @@
832832
"macro_f1": 77.14
833833
},
834834
"CVQA": {
835-
"Acc": 0.69
835+
"Acc": 69.46
836836
},
837837
"Heron": {
838838
"LLM": 85.46
@@ -879,7 +879,7 @@
879879
"macro_f1": 55.49
880880
},
881881
"CVQA": {
882-
"Acc": 0.49
882+
"Acc": 49.26
883883
},
884884
"Heron": {
885885
"LLM": 52.83
@@ -926,7 +926,7 @@
926926
"macro_f1": 64.92
927927
},
928928
"CVQA": {
929-
"Acc": 0.59
929+
"Acc": 58.62
930930
},
931931
"Heron": {
932932
"LLM": 72.19
@@ -973,7 +973,7 @@
973973
"macro_f1": 66.92
974974
},
975975
"CVQA": {
976-
"Acc": 0.63
976+
"Acc": 62.56
977977
},
978978
"Heron": {
979979
"LLM": 69.15
@@ -1020,7 +1020,7 @@
10201020
"macro_f1": 42.48
10211021
},
10221022
"CVQA": {
1023-
"Acc": 0.37
1023+
"Acc": 37.44
10241024
},
10251025
"Heron": {
10261026
"LLM": 45.52
@@ -1067,7 +1067,7 @@
10671067
"macro_f1": 64.11
10681068
},
10691069
"CVQA": {
1070-
"Acc": 0.82
1070+
"Acc": 82.27
10711071
},
10721072
"Heron": {
10731073
"LLM": 93.7
@@ -1114,7 +1114,7 @@
11141114
"macro_f1": 44.54
11151115
},
11161116
"CVQA": {
1117-
"Acc": 0.61
1117+
"Acc": 60.59
11181118
}
11191119
}
11201120
}

scripts/make_leaderboard.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def load_evaluation_data(result_dir: str, model: str, task_dirs: list[str]) -> d
104104
"mecha-ja",
105105
"mmmu",
106106
"cc-ocr",
107-
"cvqa",
107+
"substring-match",
108108
]:
109109
overall_score = overall_score * 100
110110
model_results[f"{task_dir}/{metric}"] = overall_score

0 commit comments

Comments
 (0)