@@ -26,6 +26,22 @@ class ModelRequestData:
2626)
2727
2828
29+ OVIS2_MODELS : tuple [str , ...] =  (
30+     "AIDC-AI/Ovis2-1B" ,
31+     "AIDC-AI/Ovis2-2B" ,
32+     "AIDC-AI/Ovis2-4B" ,
33+     "AIDC-AI/Ovis2-8B" ,
34+     "AIDC-AI/Ovis2-16B" ,
35+     "AIDC-AI/Ovis2-34B" ,
36+ )
37+ 
38+ 
39+ OVIS2_5_MODELS : tuple [str , ...] =  (
40+     "AIDC-AI/Ovis2.5-2B" ,
41+     "AIDC-AI/Ovis2.5-9B" ,
42+ )
43+ 
44+ 
2945class  VLLMModelRegistry :
3046    def  __init__ (self , model_id : str ):
3147        self .model_id  =  model_id 
@@ -66,6 +82,18 @@ def __init__(self, model_id: str):
6682                self ._load_internvl ,
6783            )
6884
85+         for  ovis_model  in  OVIS2_MODELS :
86+             registry [ovis_model ] =  (
87+                 self ._engine_args_ovis2 ,
88+                 self ._load_ovis2 ,
89+             )
90+ 
91+         for  ovis_model  in  OVIS2_5_MODELS :
92+             registry [ovis_model ] =  (
93+                 self ._engine_args_ovis2_5 ,
94+                 self ._load_ovis2_5 ,
95+             )
96+ 
6997        try :
7098            self ._engine_resolver , self ._request_builder  =  registry [model_id ]
7199        except  KeyError  as  exc :  # pragma: no cover - defensive programming 
@@ -277,6 +305,98 @@ def _load_glm4v(
277305
278306        return  ModelRequestData (prompts = prompts , stop_token_ids = stop_token_ids )
279307
308+     def  _engine_args_ovis2 (self ) ->  EngineArgs :
309+         return  EngineArgs (
310+             model = self .model_id ,
311+             max_model_len = 4096 ,
312+             max_num_seqs = 2 ,
313+             trust_remote_code = True ,
314+             dtype = "half" ,
315+             limit_mm_per_prompt = {self .modality : 1 },
316+         )
317+ 
318+     def  _load_ovis2 (
319+         self , texts : list [str ], images_list : list [list [Image .Image ]]
320+     ) ->  ModelRequestData :
321+         if  len (texts ) !=  len (images_list ):
322+             msg  =  "texts and images_list must have identical length" 
323+             raise  ValueError (msg )
324+ 
325+         if  not  hasattr (self , "_ovis_tokenizer" ):
326+             self ._ovis_tokenizer  =  AutoTokenizer .from_pretrained (
327+                 self .model_id ,
328+                 trust_remote_code = True ,
329+             )
330+ 
331+         tokenizer  =  self ._ovis_tokenizer 
332+         messages  =  []
333+         for  text , images  in  zip (texts , images_list ):
334+             num_images  =  len (images )
335+             placeholder_lines  =  "\n " .join ("<image>"  for  _  in  range (num_images ))
336+             if  placeholder_lines  and  text :
337+                 content  =  f"{ placeholder_lines } \n { text }  " 
338+             elif  placeholder_lines :
339+                 content  =  placeholder_lines 
340+             else :
341+                 content  =  text 
342+ 
343+             messages .append ([{"role" : "user" , "content" : content }])
344+ 
345+         prompts  =  tokenizer .apply_chat_template (
346+             messages ,
347+             tokenize = False ,
348+             add_generation_prompt = True ,
349+         )
350+ 
351+         return  ModelRequestData (prompts = prompts )
352+ 
353+     def  _engine_args_ovis2_5 (self ) ->  EngineArgs :
354+         return  EngineArgs (
355+             model = self .model_id ,
356+             max_model_len = 4096 ,
357+             max_num_seqs = 2 ,
358+             trust_remote_code = True ,
359+             dtype = "half" ,
360+             limit_mm_per_prompt = {self .modality : 1 },
361+         )
362+ 
363+     def  _load_ovis2_5 (
364+         self , texts : list [str ], images_list : list [list [Image .Image ]]
365+     ) ->  ModelRequestData :
366+         if  len (texts ) !=  len (images_list ):
367+             msg  =  "texts and images_list must have identical length" 
368+             raise  ValueError (msg )
369+ 
370+         placeholder_map  =  {
371+             "image" : "<image>" ,
372+             "video" : "<video>" ,
373+         }
374+         placeholder  =  placeholder_map .get (self .modality , "<image>" )
375+ 
376+         prompts : list [str ] =  []
377+         for  text , images  in  zip (texts , images_list ):
378+             num_images  =  len (images )
379+             lines : list [str ] =  []
380+             if  num_images  >  0 :
381+                 media_block  =  "\n " .join (placeholder  for  _  in  range (num_images ))
382+                 lines .append (media_block )
383+             if  text :
384+                 lines .append (text )
385+ 
386+             content_block  =  "\n " .join (lines )
387+             if  content_block :
388+                 content_block  =  f"{ content_block } \n " 
389+ 
390+             prompt  =  (
391+                 "<|im_start|>user\n \n " 
392+                 f"{ content_block }  " 
393+                 "<|im_end|>\n " 
394+                 "<|im_start|>assistant\n " 
395+             )
396+             prompts .append (prompt )
397+ 
398+         return  ModelRequestData (prompts = prompts )
399+ 
280400    def  _engine_args_minicpm_o (self ) ->  EngineArgs :
281401        return  EngineArgs (
282402            model = self .model_id ,
@@ -413,6 +533,34 @@ def preview_glm4v_requests(
413533    return  registry .build_requests (texts , images_list )
414534
415535
536+ def  preview_ovis2_requests (
537+     texts : list [str ], image_counts : list [int ]
538+ ) ->  ModelRequestData :
539+     """Build prompts for Ovis2 using dummy images (testing helper).""" 
540+ 
541+     if  len (texts ) !=  len (image_counts ):
542+         msg  =  "texts and image_counts must have identical length" 
543+         raise  ValueError (msg )
544+ 
545+     images_list  =  [_generate_dummy_images (count ) for  count  in  image_counts ]
546+     registry  =  VLLMModelRegistry ("AIDC-AI/Ovis2-8B" )
547+     return  registry .build_requests (texts , images_list )
548+ 
549+ 
550+ def  preview_ovis2_5_requests (
551+     texts : list [str ], image_counts : list [int ]
552+ ) ->  ModelRequestData :
553+     """Build prompts for Ovis2.5 using dummy images (testing helper).""" 
554+ 
555+     if  len (texts ) !=  len (image_counts ):
556+         msg  =  "texts and image_counts must have identical length" 
557+         raise  ValueError (msg )
558+ 
559+     images_list  =  [_generate_dummy_images (count ) for  count  in  image_counts ]
560+     registry  =  VLLMModelRegistry ("AIDC-AI/Ovis2.5-9B" )
561+     return  registry .build_requests (texts , images_list )
562+ 
563+ 
416564def  preview_minicpm_o_requests (
417565    texts : list [str ], image_counts : list [int ]
418566) ->  ModelRequestData :
@@ -439,6 +587,8 @@ def _parse_cli_args() -> argparse.Namespace:
439587            "moonshotai/Kimi-VL-A3B-Instruct" ,
440588            "deepseek-ai/deepseek-vl2" ,
441589            "zai-org/glm-4v-9b" ,
590+             * OVIS2_MODELS ,
591+             * OVIS2_5_MODELS ,
442592            "openbmb/MiniCPM-o-2_6" ,
443593            * INTERNVL_MODELS ,
444594        ],
@@ -498,6 +648,12 @@ def _preview_cli() -> None:
498648    for  internvl_model  in  INTERNVL_MODELS :
499649        preview_dispatch [internvl_model ] =  preview_internvl_requests 
500650
651+     for  ovis_model  in  OVIS2_MODELS :
652+         preview_dispatch [ovis_model ] =  preview_ovis2_requests 
653+ 
654+     for  ovis_model  in  OVIS2_5_MODELS :
655+         preview_dispatch [ovis_model ] =  preview_ovis2_5_requests 
656+ 
501657    preview_fn  =  preview_dispatch [args .model_id ]
502658    registry  =  VLLMModelRegistry (args .model_id )
503659    request_data  =  preview_fn (texts , image_counts )
0 commit comments