|
17 | 17 | # Adapted from vllm-project/vllm/blob/main/tests/models/utils.py |
18 | 18 | # |
19 | 19 |
|
20 | | -from difflib import SequenceMatcher |
21 | 20 | from typing import Dict, List, Optional, Sequence, Tuple, Union |
22 | 21 |
|
23 | 22 | from vllm.logprobs import PromptLogprobs, SampleLogprobs |
@@ -53,36 +52,6 @@ def check_outputs_equal( |
53 | 52 | assert output_ids_0 == output_ids_1, fail_msg |
54 | 53 |
|
55 | 54 |
|
56 | | -def check_outputs_similar( |
57 | | - *, |
58 | | - outputs_0_lst: Sequence[TokensText], |
59 | | - outputs_1_lst: Sequence[TokensText], |
60 | | - name_0: str, |
61 | | - name_1: str, |
62 | | - similarity_threshold: float = 0.75, |
63 | | -): |
64 | | - """ |
65 | | - Compare two sequences allowing for minor differences due to numerical precision. |
66 | | - Uses text similarity ratio instead of exact matching. |
67 | | - """ |
68 | | - assert len(outputs_0_lst) == len(outputs_1_lst) |
69 | | - |
70 | | - for prompt_idx, (outputs_0, |
71 | | - outputs_1) in enumerate(zip(outputs_0_lst, |
72 | | - outputs_1_lst)): |
73 | | - output_ids_0, output_str_0 = outputs_0 |
74 | | - output_ids_1, output_str_1 = outputs_1 |
75 | | - |
76 | | - similarity = SequenceMatcher(None, output_str_0, output_str_1).ratio() |
77 | | - |
78 | | - fail_msg = ( |
79 | | - f"Test{prompt_idx}: Similarity {similarity:.2%} < {similarity_threshold:.0%}\n" |
80 | | - f"{name_0}:\t{output_str_0!r}\n" |
81 | | - f"{name_1}:\t{output_str_1!r}") |
82 | | - |
83 | | - assert similarity >= similarity_threshold, fail_msg |
84 | | - |
85 | | - |
86 | 55 | # Representation of generated sequence as a tuple of |
87 | 56 | # * Token ID list |
88 | 57 | # * String |
|
0 commit comments