@@ -37,40 +37,6 @@ class CVQA(Task):
3737 default_metric = "substring-match"
3838
3939 def _prepare_dataset (self ) -> Dataset :
40- # Use streaming during tests to ensure we pick N Japanese samples
41- # even if they are sparse early in the split.
42- n = getattr (self .config , "max_dataset_len" , None )
43- test_subset = os .getenv ("PYTEST_CURRENT_TEST" ) or os .getenv ("EVAL_MM_TEST_SUBSET" ) == "1"
44- if n is not None and test_subset :
45- stream = load_dataset ("afaji/cvqa" , split = "test" , streaming = True )
46- buf = {
47- "index" : [],
48- "question_id" : [],
49- "question" : [],
50- "question_en" : [],
51- "options" : [],
52- "translated_options" : [],
53- "answer" : [],
54- "answer_text" : [],
55- "image" : [],
56- }
57- count = 0
58- for ex in stream :
59- if ex .get ("Subset" ) == "('Japanese', 'Japan')" :
60- buf ["index" ].append (str (count ))
61- buf ["question_id" ].append (str (count ))
62- buf ["question" ].append (ex ["Question" ])
63- buf ["question_en" ].append (ex .get ("Translated Question" ))
64- buf ["options" ].append (ex ["Options" ])
65- buf ["translated_options" ].append (ex .get ("Translated Options" ))
66- buf ["answer" ].append (ex ["Label" ]) # 0~3
67- buf ["answer_text" ].append (OPTIONS_MAP [ex ["Label" ]])
68- buf ["image" ].append (ex ["image" ]) # keep original to lazily decode later
69- count += 1
70- if count >= n :
71- break
72- return Dataset .from_dict (buf )
73-
7440 ds = load_dataset ("afaji/cvqa" , split = "test" )
7541 ds = ds .filter (lambda x : x ["Subset" ] == "('Japanese', 'Japan')" )
7642 ds = ds .map (
@@ -89,6 +55,38 @@ def _prepare_dataset(self) -> Dataset:
8955 )
9056 return ds
9157
58+ def _prepare_test_dataset (self ) -> Dataset :
59+ # Stream to pick the first N Japanese samples and build a tiny Dataset
60+ n = getattr (self .config , "max_dataset_len" , 10 )
61+ stream = load_dataset ("afaji/cvqa" , split = "test" , streaming = True )
62+ buf = {
63+ "index" : [],
64+ "question_id" : [],
65+ "question" : [],
66+ "question_en" : [],
67+ "options" : [],
68+ "translated_options" : [],
69+ "answer" : [],
70+ "answer_text" : [],
71+ "image" : [],
72+ }
73+ count = 0
74+ for ex in stream :
75+ if ex .get ("Subset" ) == "('Japanese', 'Japan')" :
76+ buf ["index" ].append (str (count ))
77+ buf ["question_id" ].append (str (count ))
78+ buf ["question" ].append (ex ["Question" ])
79+ buf ["question_en" ].append (ex .get ("Translated Question" ))
80+ buf ["options" ].append (ex ["Options" ])
81+ buf ["translated_options" ].append (ex .get ("Translated Options" ))
82+ buf ["answer" ].append (ex ["Label" ]) # 0~3
83+ buf ["answer_text" ].append (OPTIONS_MAP [ex ["Label" ]])
84+ buf ["image" ].append (ex ["image" ]) # keep original to lazily decode later
85+ count += 1
86+ if count >= n :
87+ break
88+ return Dataset .from_dict (buf )
89+
9290 @staticmethod
9391 def doc_to_text (doc ) -> str :
9492 # Lazily construct the prompt to reduce preprocessing cost
0 commit comments