1- import pytest
21from unittest .mock import MagicMock , patch
32
3+ import pytest
4+
45from guidellm .dataset .file import FileDatasetCreator
56from guidellm .dataset .hf_datasets import HFDatasetsCreator
67from guidellm .dataset .in_memory import InMemoryDatasetCreator
@@ -25,79 +26,113 @@ def test_bucket_from_data():
2526 assert buckets [1 ].count == 5
2627 assert bucket_width == 1
2728
29+
2830def mock_processor (cls ):
2931 return mock_generative_benchmark ().request_loader .processor
3032
33+
3134def new_handle_create (cls , * args , ** kwargs ):
3235 return MagicMock ()
3336
37+
3438def new_extract_dataset_name (cls , * args , ** kwargs ):
3539 return "data:prideandprejudice.txt.gz"
3640
41+
3742@pytest .mark .smoke
3843def test_dataset_from_data_uses_extracted_dataset_name ():
3944 mock_benchmark = mock_generative_benchmark ()
4045 with (
41- patch .object (SyntheticDatasetCreator , 'handle_create' , new = new_handle_create ),
42- patch .object (SyntheticDatasetCreator , 'extract_dataset_name' , new = new_extract_dataset_name )
46+ patch .object (SyntheticDatasetCreator , "handle_create" , new = new_handle_create ),
47+ patch .object (
48+ SyntheticDatasetCreator ,
49+ "extract_dataset_name" ,
50+ new = new_extract_dataset_name ,
51+ ),
4352 ):
4453 dataset = Dataset .from_data (mock_benchmark .request_loader )
4554 assert dataset .name == "data:prideandprejudice.txt.gz"
46- # with unittest.mock.patch.object(PreTrainedTokenizerBase, 'processor', new=mock_processor):
55+
4756
4857def new_is_supported (cls , * args , ** kwargs ):
4958 return True
5059
60+
5161@pytest .mark .smoke
5262def test_dataset_from_data_with_in_memory_dataset ():
5363 mock_benchmark = mock_generative_benchmark ()
54- with patch .object (InMemoryDatasetCreator , ' is_supported' , new = new_is_supported ):
64+ with patch .object (InMemoryDatasetCreator , " is_supported" , new = new_is_supported ):
5565 dataset = Dataset .from_data (mock_benchmark .request_loader )
5666 assert dataset .name == "In-memory"
5767
68+
5869def hardcoded_isnt_supported (cls , * args , ** kwargs ):
5970 return False
6071
72+
6173def new_extract_dataset_name_none (cls , * args , ** kwargs ):
6274 return None
6375
76+
6477@pytest .mark .smoke
6578def test_dataset_from_data_with_synthetic_dataset ():
6679 mock_benchmark = mock_generative_benchmark ()
6780 with (
68- patch .object (SyntheticDatasetCreator , 'handle_create' , new = new_handle_create ),
69- patch .object (InMemoryDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
70- patch .object (SyntheticDatasetCreator , 'is_supported' , new = new_is_supported ),
71- patch .object (SyntheticDatasetCreator , 'extract_dataset_name' , new = new_extract_dataset_name_none )
81+ patch .object (SyntheticDatasetCreator , "handle_create" , new = new_handle_create ),
82+ patch .object (
83+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
84+ ),
85+ patch .object (SyntheticDatasetCreator , "is_supported" , new = new_is_supported ),
86+ patch .object (
87+ SyntheticDatasetCreator ,
88+ "extract_dataset_name" ,
89+ new = new_extract_dataset_name_none ,
90+ ),
7291 ):
7392 dataset = Dataset .from_data (mock_benchmark .request_loader )
7493 assert dataset .name == "data:prideandprejudice.txt.gz"
7594
95+
7696@pytest .mark .smoke
7797def test_dataset_from_data_with_file_dataset ():
7898 mock_benchmark = mock_generative_benchmark ()
79- mock_benchmark .request_loader .data = ' dataset.yaml'
99+ mock_benchmark .request_loader .data = " dataset.yaml"
80100 with (
81- patch .object (FileDatasetCreator , 'handle_create' , new = new_handle_create ),
82- patch .object (InMemoryDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
83- patch .object (SyntheticDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
84- patch .object (FileDatasetCreator , 'is_supported' , new = new_is_supported ),
85- patch .object (FileDatasetCreator , 'extract_dataset_name' , new = new_extract_dataset_name_none )
101+ patch .object (FileDatasetCreator , "handle_create" , new = new_handle_create ),
102+ patch .object (
103+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
104+ ),
105+ patch .object (
106+ SyntheticDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
107+ ),
108+ patch .object (FileDatasetCreator , "is_supported" , new = new_is_supported ),
109+ patch .object (
110+ FileDatasetCreator ,
111+ "extract_dataset_name" ,
112+ new = new_extract_dataset_name_none ,
113+ ),
86114 ):
87115 dataset = Dataset .from_data (mock_benchmark .request_loader )
88116 assert dataset .name == "dataset.yaml"
89117
118+
90119@pytest .mark .smoke
91120def test_dataset_from_data_with_hf_dataset ():
92121 mock_benchmark = mock_generative_benchmark ()
93- mock_benchmark .request_loader .data = ' openai/gsm8k'
122+ mock_benchmark .request_loader .data = " openai/gsm8k"
94123 with (
95- patch .object (HFDatasetsCreator , 'handle_create' , new = new_handle_create ),
96- patch .object (InMemoryDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
97- patch .object (SyntheticDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
98- patch .object (FileDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
99- patch .object (HFDatasetsCreator , 'is_supported' , new = new_is_supported ),
100- patch .object (HFDatasetsCreator , 'extract_dataset_name' , new = new_extract_dataset_name_none )
124+ patch .object (HFDatasetsCreator , "handle_create" , new = new_handle_create ),
125+ patch .object (
126+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
127+ ),
128+ patch .object (
129+ SyntheticDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
130+ ),
131+ patch .object (FileDatasetCreator , "is_supported" , new = hardcoded_isnt_supported ),
132+ patch .object (HFDatasetsCreator , "is_supported" , new = new_is_supported ),
133+ patch .object (
134+ HFDatasetsCreator , "extract_dataset_name" , new = new_extract_dataset_name_none
135+ ),
101136 ):
102137 dataset = Dataset .from_data (mock_benchmark .request_loader )
103- assert dataset .name == "openai/gsm8k"
138+ assert dataset .name == "openai/gsm8k"
0 commit comments