@@ -23,9 +23,25 @@ def _prepare_dataset(self) -> Dataset:
2323 return ds
2424
2525 def _prepare_test_dataset (self ) -> Dataset :
26+ # Stream a tiny subset to avoid heavy downloads/cache writes in CI
2627 n = getattr (self .config , "max_dataset_len" , 10 )
27- ds = load_dataset ("lmms-lab/textvqa" , split = f"validation[:{ n } ]" )
28- return ds
28+ stream = load_dataset ("lmms-lab/textvqa" , split = "validation" , streaming = True )
29+ buf = {
30+ "question_id" : [],
31+ "question" : [],
32+ "answers" : [],
33+ "image" : [],
34+ }
35+ count = 0
36+ for ex in stream :
37+ buf ["question_id" ].append (str (ex ["question_id" ]))
38+ buf ["question" ].append (ex ["question" ])
39+ buf ["answers" ].append (ex ["answers" ]) # list[str]
40+ buf ["image" ].append (ex ["image" ]) # keep image column for lazy decode
41+ count += 1
42+ if count >= n :
43+ break
44+ return Dataset .from_dict (buf )
2945
3046 @staticmethod
3147 def doc_to_text (doc ) -> str :
@@ -56,31 +72,17 @@ def doc_to_answer(doc) -> list[str]:
5672
5773
5874def test_textvqa_task ():
59- """Test TextVQA task implementation ."""
75+ """Basic loader/type checks for TextVQA ."""
6076 from eval_mm .tasks .task import TaskConfig
61-
62- # Create task instance
77+
6378 task = TextVQA (TaskConfig (max_dataset_len = 10 ))
64-
65- # Load dataset
66- print ("Loading TextVQA dataset..." )
6779 ds = task .dataset
68- print (f"Dataset size: { len (ds )} " )
69-
70- # Test with first example
71- example = ds [0 ]
72- print (f"\n First example:" )
73- print (f" ID: { task .doc_to_id (example )} " )
74- print (f" Question: { task .doc_to_text (example )} " )
75- print (f" Image: { task .doc_to_visual (example )[0 ]} " )
76- print (f" Valid answers: { task .doc_to_answer (example )} " )
77-
78- # Verify data types
79- assert isinstance (task .doc_to_text (example ), str )
80- assert isinstance (task .doc_to_visual (example ), list )
81- assert all (isinstance (img , Image .Image ) for img in task .doc_to_visual (example ))
82- assert isinstance (task .doc_to_id (example ), str )
83- assert isinstance (task .doc_to_answer (example ), list )
84- assert all (isinstance (ans , str ) for ans in task .doc_to_answer (example ))
85-
86- print ("\n All tests passed!" )
80+ assert len (ds ) <= 10
81+ ex = ds [0 ]
82+ # Verify data shapes/types without verbose prints
83+ assert isinstance (task .doc_to_text (ex ), str )
84+ vis = task .doc_to_visual (ex )
85+ assert isinstance (vis , list ) and isinstance (vis [0 ], Image .Image )
86+ assert isinstance (task .doc_to_id (ex ), str )
87+ answers = task .doc_to_answer (ex )
88+ assert isinstance (answers , list ) and all (isinstance (a , str ) for a in answers )
0 commit comments