Skip to content

Commit bd62167

Browse files
committed
fix
1 parent 209e814 commit bd62167

File tree

1 file changed

+29
-27
lines changed

1 file changed

+29
-27
lines changed

scripts/make_leaderboard.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,33 @@
3333
"mmmu": "Acc",
3434
}
3535

36+
MODEL_LIST = [
37+
"stabilityai/japanese-instructblip-alpha",
38+
"stabilityai/japanese-stable-vlm",
39+
"SakanaAI/Llama-3-EvoVLM-JP-v2",
40+
"cyberagent/llava-calm2-siglip",
41+
"llm-jp/llm-jp-3-vila-14b",
42+
"sbintuitions/sarashina2-vision-8b",
43+
"sbintuitions/sarashina2-vision-14b",
44+
"MIL-UT/Asagi-14B",
45+
"llava-hf/llava-1.5-7b-hf",
46+
"llava-hf/llava-v1.6-mistral-7b-hf",
47+
"neulab/Pangea-7B-hf",
48+
"mistralai/Pixtral-12B-2409",
49+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
50+
"Efficient-Large-Model/VILA1.5-13b",
51+
"OpenGVLab/InternVL2-8B",
52+
"OpenGVLab/InternVL2-26B",
53+
"Qwen/Qwen2.5-VL-7B-Instruct",
54+
"Qwen/Qwen2.5-VL-32B-Instruct",
55+
"Qwen/Qwen2.5-VL-72B-Instruct",
56+
"google/gemma-3-4b-it",
57+
"google/gemma-3-12b-it",
58+
"google/gemma-3-27b-it",
59+
"microsoft/Phi-4-multimodal-instruct",
60+
"gpt-4o-2024-11-20",
61+
]
62+
3663

3764
def load_evaluation_data(result_dir: str, model: str, task_dirs: List[str]) -> dict:
3865
"""Load evaluation results for a given model across multiple tasks."""
@@ -266,36 +293,11 @@ def parse_args():
266293

267294
if __name__ == "__main__":
268295
args = parse_args()
269-
model_list = [
270-
"stabilityai/japanese-instructblip-alpha",
271-
"stabilityai/japanese-stable-vlm",
272-
"SakanaAI/Llama-3-EvoVLM-JP-v2",
273-
"cyberagent/llava-calm2-siglip",
274-
"llm-jp/llm-jp-3-vila-14b",
275-
"sbintuitions/sarashina2-vision-8b",
276-
"sbintuitions/sarashina2-vision-14b",
277-
"MIL-UT/Asagi-14B",
278-
"llava-hf/llava-1.5-7b-hf",
279-
"llava-hf/llava-v1.6-mistral-7b-hf",
280-
"neulab/Pangea-7B-hf",
281-
"mistralai/Pixtral-12B-2409",
282-
"meta-llama/Llama-3.2-11B-Vision-Instruct",
283-
"Efficient-Large-Model/VILA1.5-13b",
284-
"OpenGVLab/InternVL2-8B",
285-
"OpenGVLab/InternVL2-26B",
286-
"Qwen/Qwen2.5-VL-7B-Instruct",
287-
"Qwen/Qwen2.5-VL-32B-Instruct",
288-
"Qwen/Qwen2.5-VL-72B-Instruct",
289-
"google/gemma-3-4b-it",
290-
"google/gemma-3-12b-it",
291-
"google/gemma-3-27b-it",
292-
"microsoft/Phi-4-multimodal-instruct",
293-
"gpt-4o-2024-11-20",
294-
]
296+
295297
print(args.task_id_list)
296298
main(
297299
args.result_dir,
298-
model_list,
300+
MODEL_LIST,
299301
args.output_path,
300302
args.output_format,
301303
args.plot_bar,

0 commit comments

Comments
 (0)