|
33 | 33 | "mmmu": "Acc", |
34 | 34 | } |
35 | 35 |
|
| 36 | +MODEL_LIST = [ |
| 37 | + "stabilityai/japanese-instructblip-alpha", |
| 38 | + "stabilityai/japanese-stable-vlm", |
| 39 | + "SakanaAI/Llama-3-EvoVLM-JP-v2", |
| 40 | + "cyberagent/llava-calm2-siglip", |
| 41 | + "llm-jp/llm-jp-3-vila-14b", |
| 42 | + "sbintuitions/sarashina2-vision-8b", |
| 43 | + "sbintuitions/sarashina2-vision-14b", |
| 44 | + "MIL-UT/Asagi-14B", |
| 45 | + "llava-hf/llava-1.5-7b-hf", |
| 46 | + "llava-hf/llava-v1.6-mistral-7b-hf", |
| 47 | + "neulab/Pangea-7B-hf", |
| 48 | + "mistralai/Pixtral-12B-2409", |
| 49 | + "meta-llama/Llama-3.2-11B-Vision-Instruct", |
| 50 | + "Efficient-Large-Model/VILA1.5-13b", |
| 51 | + "OpenGVLab/InternVL2-8B", |
| 52 | + "OpenGVLab/InternVL2-26B", |
| 53 | + "Qwen/Qwen2.5-VL-7B-Instruct", |
| 54 | + "Qwen/Qwen2.5-VL-32B-Instruct", |
| 55 | + "Qwen/Qwen2.5-VL-72B-Instruct", |
| 56 | + "google/gemma-3-4b-it", |
| 57 | + "google/gemma-3-12b-it", |
| 58 | + "google/gemma-3-27b-it", |
| 59 | + "microsoft/Phi-4-multimodal-instruct", |
| 60 | + "gpt-4o-2024-11-20", |
| 61 | +] |
| 62 | + |
36 | 63 |
|
37 | 64 | def load_evaluation_data(result_dir: str, model: str, task_dirs: List[str]) -> dict: |
38 | 65 | """Load evaluation results for a given model across multiple tasks.""" |
@@ -266,36 +293,11 @@ def parse_args(): |
266 | 293 |
|
267 | 294 | if __name__ == "__main__": |
268 | 295 | args = parse_args() |
269 | | - model_list = [ |
270 | | - "stabilityai/japanese-instructblip-alpha", |
271 | | - "stabilityai/japanese-stable-vlm", |
272 | | - "SakanaAI/Llama-3-EvoVLM-JP-v2", |
273 | | - "cyberagent/llava-calm2-siglip", |
274 | | - "llm-jp/llm-jp-3-vila-14b", |
275 | | - "sbintuitions/sarashina2-vision-8b", |
276 | | - "sbintuitions/sarashina2-vision-14b", |
277 | | - "MIL-UT/Asagi-14B", |
278 | | - "llava-hf/llava-1.5-7b-hf", |
279 | | - "llava-hf/llava-v1.6-mistral-7b-hf", |
280 | | - "neulab/Pangea-7B-hf", |
281 | | - "mistralai/Pixtral-12B-2409", |
282 | | - "meta-llama/Llama-3.2-11B-Vision-Instruct", |
283 | | - "Efficient-Large-Model/VILA1.5-13b", |
284 | | - "OpenGVLab/InternVL2-8B", |
285 | | - "OpenGVLab/InternVL2-26B", |
286 | | - "Qwen/Qwen2.5-VL-7B-Instruct", |
287 | | - "Qwen/Qwen2.5-VL-32B-Instruct", |
288 | | - "Qwen/Qwen2.5-VL-72B-Instruct", |
289 | | - "google/gemma-3-4b-it", |
290 | | - "google/gemma-3-12b-it", |
291 | | - "google/gemma-3-27b-it", |
292 | | - "microsoft/Phi-4-multimodal-instruct", |
293 | | - "gpt-4o-2024-11-20", |
294 | | - ] |
| 296 | + |
295 | 297 | print(args.task_id_list) |
296 | 298 | main( |
297 | 299 | args.result_dir, |
298 | | - model_list, |
| 300 | + MODEL_LIST, |
299 | 301 | args.output_path, |
300 | 302 | args.output_format, |
301 | 303 | args.plot_bar, |
|
0 commit comments