Skip to content

Commit f0998c3

Browse files
authored
Merge pull request #164 from llm-jp/165-feat-vllm
vLLMによる推論のサポート
2 parents c09c074 + 8ad9679 commit f0998c3

File tree

7 files changed

+497
-5
lines changed

7 files changed

+497
-5
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,6 @@ uv.lock
139139

140140
# vscode
141141
.vscode/
142+
143+
# cache
144+
.cache/

eval_with_vllm.sh

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Set CUDA devices
2+
set -eux # エラーが発生したらスクリプトを停止する
3+
4+
#export CUDA_VISIBLE_DEVICES=0
5+
6+
# Model name to group name mapping
7+
declare -A MODEL_GROUP_MAP=(
8+
["Qwen/Qwen2.5-VL-3B-Instruct"]="normal"
9+
["Qwen/Qwen2.5-VL-7B-Instruct"]="normal"
10+
["Qwen/Qwen2.5-VL-32B-Instruct"]="normal"
11+
# ["Qwen/Qwen2.5-VL-72B-Instruct"]="normal"
12+
["google/gemma-3-4b-it"]="normal"
13+
["google/gemma-3-12b-it"]="normal"
14+
["google/gemma-3-27b-it"]="normal"
15+
)
16+
17+
# Task list
18+
declare -a task_list=(
19+
"japanese-heron-bench"
20+
)
21+
22+
# Define metrics per task
23+
declare -A METRIC_MAP=(
24+
["japanese-heron-bench"]="heron-bench"
25+
["ja-vlm-bench-in-the-wild"]="llm-as-a-judge,rougel"
26+
["ja-vg-vqa-500"]="llm-as-a-judge,rougel"
27+
["jmmmu"]="jmmmu"
28+
["ja-multi-image-vqa"]="llm-as-a-judge,rougel"
29+
["jdocqa"]="jdocqa,llm-as-a-judge"
30+
["mmmu"]="mmmu"
31+
["llava-bench-in-the-wild"]="llm-as-a-judge,rougel"
32+
["jic-vqa"]="jic-vqa"
33+
["mecha-ja"]="mecha-ja"
34+
)
35+
36+
# Result directories
37+
declare -a result_dir_list=(
38+
"result"
39+
)
40+
41+
# Main evaluation loop
42+
for RESULT_DIR in "${result_dir_list[@]}"; do
43+
for task in "${task_list[@]}"; do
44+
METRIC=${METRIC_MAP[$task]}
45+
for model_name in "${!MODEL_GROUP_MAP[@]}"; do
46+
model_group=${MODEL_GROUP_MAP[$model_name]}
47+
uv sync --group vllm_normal
48+
uv run --group vllm_normal python examples/sample_vllm.py \
49+
--model_id "$model_name" \
50+
--task_id "$task" \
51+
--metrics "$METRIC" \
52+
--judge_model "gpt-4o-2024-11-20" \
53+
--result_dir "$RESULT_DIR" \
54+
--inference_only
55+
done
56+
done
57+
done
58+
59+
echo "All evaluations are done."

examples/base_vllm.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from vllm import LLM, SamplingParams
2+
from PIL import Image
3+
from utils import GenerationConfig
4+
from base_vlm import BaseVLM
5+
from vllm_registry import VLLMModelRegistry
6+
import torch
7+
8+
9+
class VLLM(BaseVLM):
10+
def __init__(self, model_id: str = "google/gemma-3-4b-it") -> None:
11+
self.model_id = model_id
12+
self.registry = VLLMModelRegistry(self.model_id)
13+
self.processor = self.registry.processor
14+
self.vllm_loader = self.registry.loader_map[self.model_id]
15+
16+
engine_config = self.registry.get_engine_config(self.model_id)
17+
self.engine_args_dict = {
18+
"model": self.model_id,
19+
"tensor_parallel_size": 2, # number of GPUs of the machine, but 40 should be divisible by tensor_parallel_size
20+
"download_dir": "./.cache/vllm",
21+
**engine_config,
22+
}
23+
self.model = LLM(**self.engine_args_dict)
24+
25+
def generate(
26+
self,
27+
images: list[Image.Image] | None,
28+
text: str,
29+
gen_kwargs: GenerationConfig = GenerationConfig(),
30+
) -> str:
31+
if images is None:
32+
images = []
33+
req_data = self.vllm_loader(text, images)
34+
sampling_params = SamplingParams(
35+
temperature=gen_kwargs.temperature,
36+
max_tokens=gen_kwargs.max_new_tokens,
37+
stop_token_ids=req_data.stop_token_ids,
38+
)
39+
outputs = self.model.generate(
40+
{
41+
"prompt": req_data.prompt,
42+
"multi_modal_data": {"image": req_data.image_data},
43+
},
44+
sampling_params=sampling_params,
45+
lora_request=req_data.lora_requests,
46+
)
47+
return outputs[0].outputs[0].text
48+
49+
def batch_generate(
50+
self,
51+
images_list: list[list[Image.Image]] | None,
52+
text_list: list[str],
53+
gen_kwargs: GenerationConfig = GenerationConfig(),
54+
) -> list[str]:
55+
if images_list is None:
56+
images_list = [[] for _ in range(len(text_list))]
57+
58+
assert len(images_list) == len(text_list)
59+
60+
from tqdm import tqdm
61+
62+
req_data_list = []
63+
64+
for text, images in tqdm(zip(text_list, images_list)):
65+
req_data_list.append(self.vllm_loader(text, images))
66+
67+
sampling_params = SamplingParams(
68+
temperature=gen_kwargs.temperature,
69+
max_tokens=gen_kwargs.max_new_tokens,
70+
)
71+
72+
print(f"Generated {len(req_data_list)} requests")
73+
74+
outputs = self.model.generate(
75+
[
76+
{
77+
"prompt": req_data.prompt,
78+
"multi_modal_data": {"image": req_data.image_data},
79+
}
80+
for req_data in req_data_list
81+
],
82+
sampling_params=sampling_params,
83+
)
84+
return [output.outputs[0].text for output in outputs]
85+
86+
87+
if __name__ == "__main__":
88+
print("=== Qwen/Qwen2.5-VL-3B-Instruct ===")
89+
vllm = VLLM("Qwen/Qwen2.5-VL-3B-Instruct")
90+
vllm.test_vlm()
91+
vllm.test_vlm_batch_100()

examples/base_vlm.py

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@ def generate(
1717
"""Generate a response given an image (or list of images) and a prompt."""
1818
raise NotImplementedError
1919

20+
def batch_generate(
21+
self,
22+
images_list: list[list[Image.Image]] | None,
23+
text_list: list[str],
24+
gen_kwargs: GenerationConfig = GenerationConfig(),
25+
) -> list[str]:
26+
"""Generate a response given a list of images and a list of prompts."""
27+
raise NotImplementedError
28+
2029
def test_vlm(self):
2130
"""Test the model with one or two images."""
2231
image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
@@ -35,8 +44,50 @@ def test_vlm(self):
3544
output, str
3645
), f"Expected output to be a string, but got {type(output)}"
3746

38-
output = self.generate([], "画像には何が映っていますか?")
39-
logger.info(f"Output: {output}")
40-
assert isinstance(
41-
output, str
42-
), f"Expected output to be a string, but got {type(output)}"
47+
# --- No image case ---
48+
# output = self.generate([], "画像には何が映っていますか?")
49+
# logger.info(f"Output: {output}")
50+
# assert isinstance(
51+
# output, str
52+
# ), f"Expected output to be a string, but got {type(output)}"
53+
54+
def test_vlm_100(self):
55+
"""Test the model with one or two images."""
56+
image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
57+
image = Image.open(requests.get(image_file, stream=True).raw)
58+
59+
import time
60+
61+
start_time = time.time()
62+
for _ in range(100):
63+
output = self.generate([image], "画像には何が映っていますか?")
64+
logger.info(f"Output: {output}")
65+
assert isinstance(
66+
output, str
67+
), f"Expected output to be a string, but got {type(output)}"
68+
end_time = time.time()
69+
logger.info(f"Time taken: {end_time - start_time} seconds for 100 times")
70+
71+
def test_vlm_batch_100(self):
72+
"""Test the model with one or two images."""
73+
74+
print("=== Batch 100 test ===")
75+
print(f"Model: {self.model_id}")
76+
77+
image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
78+
image = Image.open(requests.get(image_file, stream=True).raw)
79+
80+
import time
81+
82+
image_list = [[image] for _ in range(100)]
83+
text_list = [["画像には何が映っていますか?"] for _ in range(100)]
84+
85+
start_time = time.time()
86+
outputs = self.batch_generate(image_list, text_list)
87+
for output in outputs:
88+
assert isinstance(
89+
output, str
90+
), f"Expected output to be a string, but got {type(output)}"
91+
92+
end_time = time.time()
93+
logger.info(f"Time taken: {end_time - start_time} seconds for BATCH 100 times")

0 commit comments

Comments
 (0)