Skip to content

Commit 19d2d7f

Browse files
committed
fix test bug
1 parent 9a40f63 commit 19d2d7f

File tree

2 files changed

+5
-6
lines changed

2 files changed

+5
-6
lines changed

src/eval_mm/metrics/llm_as_a_judge_scorer.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,8 @@ def aggregate(scores: list, **kwargs) -> float:
9898

9999

100100
def test_llm_as_a_judge_scorer():
101-
from eval_mm.utils.azure_client import OpenAIChatAPI
102-
103-
client = OpenAIChatAPI()
101+
from eval_mm.utils.azure_client import MochChatAPI
102+
client = MochChatAPI()
104103
questions = ["What is the capital of Japan?", "What is the capital of France?"]
105104
answers = ["Tokyo", "Paris"]
106105
preds = ["Tokyo", "Paris"]
@@ -114,6 +113,6 @@ def test_llm_as_a_judge_scorer():
114113
judge_model=model_name,
115114
batch_size=batch_size,
116115
)
117-
assert scores == [5, 5]
116+
assert scores == [1, 1]
118117
scores = LlmAsaJudgeScorer.aggregate(scores)
119-
assert scores == 5.0
118+
assert scores == 1.0

src/eval_mm/tasks/jic_vqa.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def doc_to_text(doc) -> str:
114114

115115
@staticmethod
116116
def doc_to_visual(doc) -> list[Image.Image]:
117-
return doc["image"]
117+
return [doc["image"]]
118118

119119
@staticmethod
120120
def doc_to_id(doc) -> int:

0 commit comments

Comments
 (0)