1+ from unittest .mock import MagicMock , patch
2+
13import pytest
24
3- from guidellm .presentation .data_models import Bucket
5+ from guidellm .dataset .file import FileDatasetCreator
6+ from guidellm .dataset .hf_datasets import HFDatasetsCreator
7+ from guidellm .dataset .in_memory import InMemoryDatasetCreator
8+ from guidellm .dataset .synthetic import SyntheticDatasetCreator
9+ from guidellm .presentation .data_models import Bucket , Dataset
10+ from tests .unit .mock_benchmark import mock_generative_benchmark
411
512
613@pytest .mark .smoke
@@ -18,3 +25,114 @@ def test_bucket_from_data():
1825 assert buckets [1 ].value == 8.0
1926 assert buckets [1 ].count == 5
2027 assert bucket_width == 1
28+
29+
30+ def mock_processor (cls ):
31+ return mock_generative_benchmark ().request_loader .processor
32+
33+
34+ def new_handle_create (cls , * args , ** kwargs ):
35+ return MagicMock ()
36+
37+
38+ def new_extract_dataset_name (cls , * args , ** kwargs ):
39+ return "data:prideandprejudice.txt.gz"
40+
41+
42+ @pytest .mark .smoke
43+ def test_dataset_from_data_uses_extracted_dataset_name ():
44+ mock_benchmark = mock_generative_benchmark ()
45+ with (
46+ patch .object (SyntheticDatasetCreator , "handle_create" , new = new_handle_create ),
47+ patch .object (
48+ SyntheticDatasetCreator ,
49+ "extract_dataset_name" ,
50+ new = new_extract_dataset_name ,
51+ ),
52+ ):
53+ dataset = Dataset .from_data (mock_benchmark .request_loader )
54+ assert dataset .name == "data:prideandprejudice.txt.gz"
55+
56+
57+ def new_is_supported (cls , * args , ** kwargs ):
58+ return True
59+
60+
61+ @pytest .mark .smoke
62+ def test_dataset_from_data_with_in_memory_dataset ():
63+ mock_benchmark = mock_generative_benchmark ()
64+ with patch .object (InMemoryDatasetCreator , "is_supported" , new = new_is_supported ):
65+ dataset = Dataset .from_data (mock_benchmark .request_loader )
66+ assert dataset .name == "In-memory"
67+
68+
69+ def hardcoded_isnt_supported (cls , * args , ** kwargs ):
70+ return False
71+
72+
73+ def new_extract_dataset_name_none (cls , * args , ** kwargs ):
74+ return None
75+
76+
77+ @pytest .mark .smoke
78+ def test_dataset_from_data_with_synthetic_dataset ():
79+ mock_benchmark = mock_generative_benchmark ()
80+ with (
81+ patch .object (SyntheticDatasetCreator , "handle_create" , new = new_handle_create ),
82+ patch .object (
83+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
84+ ),
85+ patch .object (SyntheticDatasetCreator , "is_supported" , new = new_is_supported ),
86+ patch .object (
87+ SyntheticDatasetCreator ,
88+ "extract_dataset_name" ,
89+ new = new_extract_dataset_name_none ,
90+ ),
91+ ):
92+ dataset = Dataset .from_data (mock_benchmark .request_loader )
93+ assert dataset .name == "data:prideandprejudice.txt.gz"
94+
95+
96+ @pytest .mark .smoke
97+ def test_dataset_from_data_with_file_dataset ():
98+ mock_benchmark = mock_generative_benchmark ()
99+ mock_benchmark .request_loader .data = "dataset.yaml"
100+ with (
101+ patch .object (FileDatasetCreator , "handle_create" , new = new_handle_create ),
102+ patch .object (
103+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
104+ ),
105+ patch .object (
106+ SyntheticDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
107+ ),
108+ patch .object (FileDatasetCreator , "is_supported" , new = new_is_supported ),
109+ patch .object (
110+ FileDatasetCreator ,
111+ "extract_dataset_name" ,
112+ new = new_extract_dataset_name_none ,
113+ ),
114+ ):
115+ dataset = Dataset .from_data (mock_benchmark .request_loader )
116+ assert dataset .name == "dataset.yaml"
117+
118+
119+ @pytest .mark .smoke
120+ def test_dataset_from_data_with_hf_dataset ():
121+ mock_benchmark = mock_generative_benchmark ()
122+ mock_benchmark .request_loader .data = "openai/gsm8k"
123+ with (
124+ patch .object (HFDatasetsCreator , "handle_create" , new = new_handle_create ),
125+ patch .object (
126+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
127+ ),
128+ patch .object (
129+ SyntheticDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
130+ ),
131+ patch .object (FileDatasetCreator , "is_supported" , new = hardcoded_isnt_supported ),
132+ patch .object (HFDatasetsCreator , "is_supported" , new = new_is_supported ),
133+ patch .object (
134+ HFDatasetsCreator , "extract_dataset_name" , new = new_extract_dataset_name_none
135+ ),
136+ ):
137+ dataset = Dataset .from_data (mock_benchmark .request_loader )
138+ assert dataset .name == "openai/gsm8k"
0 commit comments