|
4 | 4 | "url": "https://huggingface.co/stabilityai/japanese-instructblip-alpha", |
5 | 5 | "scores": { |
6 | 6 | "CC-OCR": { |
7 | | - "macro_f1": 0.0 |
| 7 | + "macro_f1": 0.3 |
8 | 8 | }, |
9 | 9 | "CVQA": { |
10 | 10 | "Acc": 0.24 |
|
92 | 92 | "url": "https://huggingface.co/SakanaAI/Llama-3-EvoVLM-JP-v2", |
93 | 93 | "scores": { |
94 | 94 | "CC-OCR": { |
95 | | - "macro_f1": 0.1 |
| 95 | + "macro_f1": 10.44 |
96 | 96 | }, |
97 | 97 | "CVQA": { |
98 | 98 | "Acc": 0.44 |
|
139 | 139 | "url": "https://huggingface.co/cyberagent/llava-calm2-siglip", |
140 | 140 | "scores": { |
141 | 141 | "CC-OCR": { |
142 | | - "macro_f1": 0.11 |
| 142 | + "macro_f1": 11.37 |
143 | 143 | }, |
144 | 144 | "CVQA": { |
145 | 145 | "Acc": 0.07 |
|
186 | 186 | "url": "https://huggingface.co/llm-jp/llm-jp-3-vila-14b", |
187 | 187 | "scores": { |
188 | 188 | "CC-OCR": { |
189 | | - "macro_f1": 0.12 |
| 189 | + "macro_f1": 11.55 |
190 | 190 | }, |
191 | 191 | "CVQA": { |
192 | 192 | "Acc": 0.3 |
|
233 | 233 | "url": "https://huggingface.co/sbintuitions/sarashina2-vision-8b", |
234 | 234 | "scores": { |
235 | 235 | "CC-OCR": { |
236 | | - "macro_f1": 0.08 |
| 236 | + "macro_f1": 8.2 |
237 | 237 | }, |
238 | 238 | "CVQA": { |
239 | 239 | "Acc": 0.49 |
|
280 | 280 | "url": "https://huggingface.co/sbintuitions/sarashina2-vision-14b", |
281 | 281 | "scores": { |
282 | 282 | "CC-OCR": { |
283 | | - "macro_f1": 0.23 |
| 283 | + "macro_f1": 23.23 |
284 | 284 | }, |
285 | 285 | "CVQA": { |
286 | 286 | "Acc": 0.56 |
|
365 | 365 | "url": "https://huggingface.co/llava-hf/llava-1.5-7b-hf", |
366 | 366 | "scores": { |
367 | 367 | "CC-OCR": { |
368 | | - "macro_f1": 0.14 |
| 368 | + "macro_f1": 14.46 |
369 | 369 | }, |
370 | 370 | "CVQA": { |
371 | 371 | "Acc": 0.41 |
|
412 | 412 | "url": "https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf", |
413 | 413 | "scores": { |
414 | 414 | "CC-OCR": { |
415 | | - "macro_f1": 0.2 |
| 415 | + "macro_f1": 20.12 |
416 | 416 | }, |
417 | 417 | "CVQA": { |
418 | 418 | "Acc": 0.27 |
|
459 | 459 | "url": "https://huggingface.co/neulab/Pangea-7B-hf", |
460 | 460 | "scores": { |
461 | 461 | "CC-OCR": { |
462 | | - "macro_f1": 0.19 |
| 462 | + "macro_f1": 18.74 |
463 | 463 | }, |
464 | 464 | "CVQA": { |
465 | 465 | "Acc": 0.48 |
|
506 | 506 | "url": "https://huggingface.co/mistralai/Pixtral-12B-2409", |
507 | 507 | "scores": { |
508 | 508 | "CC-OCR": { |
509 | | - "macro_f1": 0.27 |
| 509 | + "macro_f1": 27.17 |
510 | 510 | }, |
511 | 511 | "CVQA": { |
512 | 512 | "Acc": 0.49 |
|
553 | 553 | "url": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct", |
554 | 554 | "scores": { |
555 | 555 | "CC-OCR": { |
556 | | - "macro_f1": 0.28 |
| 556 | + "macro_f1": 27.71 |
557 | 557 | }, |
558 | 558 | "CVQA": { |
559 | 559 | "Acc": 0.52 |
|
600 | 600 | "url": "https://huggingface.co/Efficient-Large-Model/VILA1.5-13b", |
601 | 601 | "scores": { |
602 | 602 | "CC-OCR": { |
603 | | - "macro_f1": 0.21 |
| 603 | + "macro_f1": 20.56 |
604 | 604 | }, |
605 | 605 | "CVQA": { |
606 | 606 | "Acc": 0.44 |
|
647 | 647 | "url": "https://huggingface.co/OpenGVLab/InternVL2-8B", |
648 | 648 | "scores": { |
649 | 649 | "CC-OCR": { |
650 | | - "macro_f1": 0.37 |
| 650 | + "macro_f1": 37.33 |
651 | 651 | }, |
652 | 652 | "CVQA": { |
653 | 653 | "Acc": 0.48 |
|
694 | 694 | "url": "https://huggingface.co/OpenGVLab/InternVL2-26B", |
695 | 695 | "scores": { |
696 | 696 | "CC-OCR": { |
697 | | - "macro_f1": 0.39 |
| 697 | + "macro_f1": 38.51 |
698 | 698 | }, |
699 | 699 | "CVQA": { |
700 | 700 | "Acc": 0.5 |
|
741 | 741 | "url": "https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct", |
742 | 742 | "scores": { |
743 | 743 | "CC-OCR": { |
744 | | - "macro_f1": 0.75 |
| 744 | + "macro_f1": 74.77 |
745 | 745 | }, |
746 | 746 | "CVQA": { |
747 | 747 | "Acc": 0.56 |
|
829 | 829 | "url": "https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct", |
830 | 830 | "scores": { |
831 | 831 | "CC-OCR": { |
832 | | - "macro_f1": 0.77 |
| 832 | + "macro_f1": 77.14 |
833 | 833 | }, |
834 | 834 | "CVQA": { |
835 | 835 | "Acc": 0.69 |
|
876 | 876 | "url": "https://huggingface.co/google/gemma-3-4b-it", |
877 | 877 | "scores": { |
878 | 878 | "CC-OCR": { |
879 | | - "macro_f1": 0.55 |
| 879 | + "macro_f1": 55.49 |
880 | 880 | }, |
881 | 881 | "CVQA": { |
882 | 882 | "Acc": 0.49 |
|
923 | 923 | "url": "https://huggingface.co/google/gemma-3-12b-it", |
924 | 924 | "scores": { |
925 | 925 | "CC-OCR": { |
926 | | - "macro_f1": 0.65 |
| 926 | + "macro_f1": 64.92 |
927 | 927 | }, |
928 | 928 | "CVQA": { |
929 | 929 | "Acc": 0.59 |
|
970 | 970 | "url": "https://huggingface.co/google/gemma-3-27b-it", |
971 | 971 | "scores": { |
972 | 972 | "CC-OCR": { |
973 | | - "macro_f1": 0.67 |
| 973 | + "macro_f1": 66.92 |
974 | 974 | }, |
975 | 975 | "CVQA": { |
976 | 976 | "Acc": 0.63 |
|
1017 | 1017 | "url": "https://huggingface.co/microsoft/Phi-4-multimodal-instruct", |
1018 | 1018 | "scores": { |
1019 | 1019 | "CC-OCR": { |
1020 | | - "macro_f1": 0.42 |
| 1020 | + "macro_f1": 42.48 |
1021 | 1021 | }, |
1022 | 1022 | "CVQA": { |
1023 | 1023 | "Acc": 0.37 |
|
1064 | 1064 | "url": "https://huggingface.co/gpt-4o-2024-11-20", |
1065 | 1065 | "scores": { |
1066 | 1066 | "CC-OCR": { |
1067 | | - "macro_f1": 0.64 |
| 1067 | + "macro_f1": 64.11 |
1068 | 1068 | }, |
1069 | 1069 | "CVQA": { |
1070 | 1070 | "Acc": 0.82 |
|
1111 | 1111 | "url": "https://huggingface.co/turing-motors/Heron-NVILA-Lite-15B", |
1112 | 1112 | "scores": { |
1113 | 1113 | "CC-OCR": { |
1114 | | - "macro_f1": 0.45 |
| 1114 | + "macro_f1": 44.54 |
1115 | 1115 | }, |
1116 | 1116 | "CVQA": { |
1117 | 1117 | "Acc": 0.61 |
|
0 commit comments