Skip to content

Commit 2350756

Browse files
committed
Fix results
1 parent 77cc07a commit 2350756

File tree

3 files changed

+32
-23
lines changed

3 files changed

+32
-23
lines changed

github_pages/public/leaderboard.json

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"url": "https://huggingface.co/stabilityai/japanese-instructblip-alpha",
55
"scores": {
66
"CC-OCR": {
7-
"macro_f1": 0.0
7+
"macro_f1": 0.3
88
},
99
"CVQA": {
1010
"Acc": 0.24
@@ -92,7 +92,7 @@
9292
"url": "https://huggingface.co/SakanaAI/Llama-3-EvoVLM-JP-v2",
9393
"scores": {
9494
"CC-OCR": {
95-
"macro_f1": 0.1
95+
"macro_f1": 10.44
9696
},
9797
"CVQA": {
9898
"Acc": 0.44
@@ -139,7 +139,7 @@
139139
"url": "https://huggingface.co/cyberagent/llava-calm2-siglip",
140140
"scores": {
141141
"CC-OCR": {
142-
"macro_f1": 0.11
142+
"macro_f1": 11.37
143143
},
144144
"CVQA": {
145145
"Acc": 0.07
@@ -186,7 +186,7 @@
186186
"url": "https://huggingface.co/llm-jp/llm-jp-3-vila-14b",
187187
"scores": {
188188
"CC-OCR": {
189-
"macro_f1": 0.12
189+
"macro_f1": 11.55
190190
},
191191
"CVQA": {
192192
"Acc": 0.3
@@ -233,7 +233,7 @@
233233
"url": "https://huggingface.co/sbintuitions/sarashina2-vision-8b",
234234
"scores": {
235235
"CC-OCR": {
236-
"macro_f1": 0.08
236+
"macro_f1": 8.2
237237
},
238238
"CVQA": {
239239
"Acc": 0.49
@@ -280,7 +280,7 @@
280280
"url": "https://huggingface.co/sbintuitions/sarashina2-vision-14b",
281281
"scores": {
282282
"CC-OCR": {
283-
"macro_f1": 0.23
283+
"macro_f1": 23.23
284284
},
285285
"CVQA": {
286286
"Acc": 0.56
@@ -365,7 +365,7 @@
365365
"url": "https://huggingface.co/llava-hf/llava-1.5-7b-hf",
366366
"scores": {
367367
"CC-OCR": {
368-
"macro_f1": 0.14
368+
"macro_f1": 14.46
369369
},
370370
"CVQA": {
371371
"Acc": 0.41
@@ -412,7 +412,7 @@
412412
"url": "https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf",
413413
"scores": {
414414
"CC-OCR": {
415-
"macro_f1": 0.2
415+
"macro_f1": 20.12
416416
},
417417
"CVQA": {
418418
"Acc": 0.27
@@ -459,7 +459,7 @@
459459
"url": "https://huggingface.co/neulab/Pangea-7B-hf",
460460
"scores": {
461461
"CC-OCR": {
462-
"macro_f1": 0.19
462+
"macro_f1": 18.74
463463
},
464464
"CVQA": {
465465
"Acc": 0.48
@@ -506,7 +506,7 @@
506506
"url": "https://huggingface.co/mistralai/Pixtral-12B-2409",
507507
"scores": {
508508
"CC-OCR": {
509-
"macro_f1": 0.27
509+
"macro_f1": 27.17
510510
},
511511
"CVQA": {
512512
"Acc": 0.49
@@ -553,7 +553,7 @@
553553
"url": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct",
554554
"scores": {
555555
"CC-OCR": {
556-
"macro_f1": 0.28
556+
"macro_f1": 27.71
557557
},
558558
"CVQA": {
559559
"Acc": 0.52
@@ -600,7 +600,7 @@
600600
"url": "https://huggingface.co/Efficient-Large-Model/VILA1.5-13b",
601601
"scores": {
602602
"CC-OCR": {
603-
"macro_f1": 0.21
603+
"macro_f1": 20.56
604604
},
605605
"CVQA": {
606606
"Acc": 0.44
@@ -647,7 +647,7 @@
647647
"url": "https://huggingface.co/OpenGVLab/InternVL2-8B",
648648
"scores": {
649649
"CC-OCR": {
650-
"macro_f1": 0.37
650+
"macro_f1": 37.33
651651
},
652652
"CVQA": {
653653
"Acc": 0.48
@@ -694,7 +694,7 @@
694694
"url": "https://huggingface.co/OpenGVLab/InternVL2-26B",
695695
"scores": {
696696
"CC-OCR": {
697-
"macro_f1": 0.39
697+
"macro_f1": 38.51
698698
},
699699
"CVQA": {
700700
"Acc": 0.5
@@ -741,7 +741,7 @@
741741
"url": "https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct",
742742
"scores": {
743743
"CC-OCR": {
744-
"macro_f1": 0.75
744+
"macro_f1": 74.77
745745
},
746746
"CVQA": {
747747
"Acc": 0.56
@@ -829,7 +829,7 @@
829829
"url": "https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct",
830830
"scores": {
831831
"CC-OCR": {
832-
"macro_f1": 0.77
832+
"macro_f1": 77.14
833833
},
834834
"CVQA": {
835835
"Acc": 0.69
@@ -876,7 +876,7 @@
876876
"url": "https://huggingface.co/google/gemma-3-4b-it",
877877
"scores": {
878878
"CC-OCR": {
879-
"macro_f1": 0.55
879+
"macro_f1": 55.49
880880
},
881881
"CVQA": {
882882
"Acc": 0.49
@@ -923,7 +923,7 @@
923923
"url": "https://huggingface.co/google/gemma-3-12b-it",
924924
"scores": {
925925
"CC-OCR": {
926-
"macro_f1": 0.65
926+
"macro_f1": 64.92
927927
},
928928
"CVQA": {
929929
"Acc": 0.59
@@ -970,7 +970,7 @@
970970
"url": "https://huggingface.co/google/gemma-3-27b-it",
971971
"scores": {
972972
"CC-OCR": {
973-
"macro_f1": 0.67
973+
"macro_f1": 66.92
974974
},
975975
"CVQA": {
976976
"Acc": 0.63
@@ -1017,7 +1017,7 @@
10171017
"url": "https://huggingface.co/microsoft/Phi-4-multimodal-instruct",
10181018
"scores": {
10191019
"CC-OCR": {
1020-
"macro_f1": 0.42
1020+
"macro_f1": 42.48
10211021
},
10221022
"CVQA": {
10231023
"Acc": 0.37
@@ -1064,7 +1064,7 @@
10641064
"url": "https://huggingface.co/gpt-4o-2024-11-20",
10651065
"scores": {
10661066
"CC-OCR": {
1067-
"macro_f1": 0.64
1067+
"macro_f1": 64.11
10681068
},
10691069
"CVQA": {
10701070
"Acc": 0.82
@@ -1111,7 +1111,7 @@
11111111
"url": "https://huggingface.co/turing-motors/Heron-NVILA-Lite-15B",
11121112
"scores": {
11131113
"CC-OCR": {
1114-
"macro_f1": 0.45
1114+
"macro_f1": 44.54
11151115
},
11161116
"CVQA": {
11171117
"Acc": 0.61

scripts/make_leaderboard.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,15 @@ def load_evaluation_data(result_dir: str, model: str, task_dirs: list[str]) -> d
9797
logger.warning(f"Skipping unsupported metric: {metric}")
9898
continue
9999
overall_score = aggregate_output["overall_score"]
100-
if metric in ["jdocqa", "jmmmu", "jic-vqa", "mecha-ja", "mmmu"]:
100+
if metric in [
101+
"jdocqa",
102+
"jmmmu",
103+
"jic-vqa",
104+
"mecha-ja",
105+
"mmmu",
106+
"cc-ocr",
107+
"cvqa",
108+
]:
101109
overall_score = overall_score * 100
102110
model_results[f"{task_dir}/{metric}"] = overall_score
103111

src/eval_mm/tasks/cc_ocr.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class CCOCR(Task):
1616
entries labeled as "Japanese" and decodes base64-encoded images into PIL Image
1717
objects for visual processing.
1818
"""
19+
1920
default_metric = "ccocr"
2021

2122
@staticmethod

0 commit comments

Comments
 (0)