Skip to content

Adds multimodal support and MMMU pro #675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
409b0c0
init
NathanHB Apr 15, 2025
ee334c5
init
NathanHB Apr 15, 2025
e988f6f
init
NathanHB Apr 15, 2025
5fddc82
Naive implementation
qubvel Apr 21, 2025
7ce9c97
Fix choices + change metric
qubvel Apr 22, 2025
e08731a
refactor prompt function
qubvel Apr 22, 2025
8d4543b
style
qubvel Apr 22, 2025
05df4b6
FIx typing
qubvel May 6, 2025
16a9e97
Merge branch 'main' into nathan-adds-multimodal
qubvel May 6, 2025
de60add
Update max length
qubvel May 6, 2025
5fd52f5
Remove docs
qubvel May 6, 2025
10b4e0b
Update auto processor
qubvel May 6, 2025
bc7610d
add quantization config, transformers config
qubvel May 6, 2025
49e4986
Update generation size
qubvel May 7, 2025
75c900c
Add batching
qubvel May 7, 2025
4e5fdd3
Style
qubvel May 7, 2025
d1ae8b7
Add images to requests
qubvel May 7, 2025
f855158
nit
qubvel May 7, 2025
641819e
nit
qubvel May 7, 2025
aa0acb7
Clean up a bit
qubvel May 7, 2025
56f962b
nit
qubvel May 7, 2025
8e99388
Fix batch size
qubvel May 7, 2025
418840d
Add images for Doc class
qubvel May 7, 2025
e35db98
clean-up prompt manager
qubvel May 7, 2025
57c18f7
Style
qubvel May 7, 2025
7cd35c2
Style
qubvel May 7, 2025
e13cac9
Clean up prompt manager
qubvel May 7, 2025
fa18ec2
Add dtype
qubvel May 7, 2025
c59e5af
Update prompt function
qubvel May 7, 2025
8f31f1b
Refactor to pass ruff check
qubvel May 7, 2025
3675066
fix the CI
NathanHB May 12, 2025
30e22ab
fix the CI
NathanHB May 12, 2025
924bf13
Fit typing
qubvel May 12, 2025
b909259
Fix system content
qubvel May 12, 2025
665474a
Split to vision and standard tasks
qubvel May 13, 2025
1a73dd0
Data parallel
qubvel May 13, 2025
b618af7
Clean up config docs, tokenizer -> processor
qubvel May 13, 2025
79e222d
Add fast image processor option
qubvel May 13, 2025
bd2c595
Fix style
qubvel May 13, 2025
831f95e
commit
NathanHB May 19, 2025
80568e7
commit
NathanHB May 19, 2025
9fb75a6
commit
NathanHB May 19, 2025
62165a8
commit
NathanHB May 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions examples/model_configs/transformers_vlm_model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
model_parameters:
model_name: "Qwen/Qwen2.5-VL-3B-Instruct"
revision: "main"
dtype: "float16"
compile: false
model_parallel: false
batch_size: 1
generation_parameters:
temperature: 0.2
top_p: 0.9
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ classifiers = [
keywords = ["evaluation", "nlp", "llm"]
dependencies = [
# Base dependencies
"transformers>=4.38.0",
"transformers>=4.51.0",
"accelerate",
"huggingface_hub[hf_xet]>=0.30.2",
"torch>=2.0,<3.0",
Expand Down
9 changes: 8 additions & 1 deletion src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def accelerate( # noqa C901
use_chat_template: Annotated[
bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
] = False,
vision_model: Annotated[
bool, Option(help="Use vision model for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
] = False,
system_prompt: Annotated[
Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
] = None,
Expand Down Expand Up @@ -109,6 +112,7 @@ def accelerate( # noqa C901
from lighteval.models.transformers.adapter_model import AdapterModelConfig
from lighteval.models.transformers.delta_model import DeltaModelConfig
from lighteval.models.transformers.transformers_model import TransformersModelConfig
from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModelConfig
from lighteval.models.utils import ModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters

Expand Down Expand Up @@ -147,7 +151,10 @@ def accelerate( # noqa C901
elif config.get("adapter_weights", False):
model_config = AdapterModelConfig(**config)
else:
model_config = TransformersModelConfig(**config)
if vision_model:
model_config = VLMTransformersModelConfig(**config)
else:
model_config = TransformersModelConfig(**config)

pipeline = Pipeline(
tasks=tasks,
Expand Down
29 changes: 13 additions & 16 deletions src/lighteval/models/model_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
from lighteval.models.transformers.adapter_model import AdapterModel, AdapterModelConfig
from lighteval.models.transformers.delta_model import DeltaModel, DeltaModelConfig
from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig
from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModel, VLMTransformersModelConfig
from lighteval.models.utils import ModelConfig
from lighteval.models.vllm.vllm_model import VLLMModel, VLLMModelConfig
from lighteval.utils.imports import (
NO_LITELLM_ERROR_MSG,
Expand All @@ -60,21 +62,8 @@


def load_model( # noqa: C901
config: Union[
TransformersModelConfig,
AdapterModelConfig,
DeltaModelConfig,
TGIModelConfig,
InferenceEndpointModelConfig,
DummyModelConfig,
VLLMModelConfig,
CustomModelConfig,
OpenAIModelConfig,
LiteLLMModelConfig,
SGLangModelConfig,
InferenceProvidersModelConfig,
],
) -> Union[TransformersModel, AdapterModel, DeltaModel, ModelClient, DummyModel]:
config: ModelConfig,
) -> LightevalModel:
"""Will load either a model from an inference server or a model from a checkpoint, depending
on the config type.

Expand All @@ -100,6 +89,9 @@ def load_model( # noqa: C901
if isinstance(config, TransformersModelConfig):
return load_model_with_accelerate_or_default(config)

if isinstance(config, VLMTransformersModelConfig):
return load_model_with_accelerate_or_default(config)

if isinstance(config, DummyModelConfig):
return load_dummy_model(config)

Expand Down Expand Up @@ -186,7 +178,9 @@ def load_model_with_inference_endpoints(config: Union[InferenceEndpointModelConf


def load_model_with_accelerate_or_default(
config: Union[AdapterModelConfig, TransformersModelConfig, DeltaModelConfig],
config: Union[
AdapterModelConfig, TransformersModelConfig, DeltaModelConfig, VLLMModelConfig, VLMTransformersModelConfig
],
):
if isinstance(config, AdapterModelConfig):
model = AdapterModel(config=config)
Expand All @@ -197,6 +191,9 @@ def load_model_with_accelerate_or_default(
raise ImportError(NO_VLLM_ERROR_MSG)
model = VLLMModel(config=config)
return model
elif isinstance(config, VLMTransformersModelConfig):
model = VLMTransformersModel(config=config)
return model
else:
model = TransformersModel(config=config)

Expand Down
Loading
Loading