Skip to content

Commit a6d115d

Browse files
DaltheCowparfeniukinkDmytro Parfeniukmarkurtz
authored
Multiple request rates (#2)
Co-authored-by: Dmytro Parfeniuk <[email protected]> Co-authored-by: Dmytro Parfeniuk <[email protected]> Co-authored-by: Mark Kurtz <[email protected]>
1 parent 051df6b commit a6d115d

File tree

9 files changed

+327
-49
lines changed

9 files changed

+327
-49
lines changed

src/guidellm/executor/__init__.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
from .executor import Executor
22
from .profile_generator import (
3+
FixedRateProfileGenerator,
34
Profile,
45
ProfileGenerationMode,
56
ProfileGenerator,
6-
SingleProfileGenerator,
77
SweepProfileGenerator,
8+
rate_type_to_load_gen_mode,
9+
rate_type_to_profile_mode,
810
)
911

1012
__all__ = [
13+
"rate_type_to_load_gen_mode",
14+
"rate_type_to_profile_mode",
1115
"Executor",
1216
"ProfileGenerationMode",
1317
"Profile",
1418
"ProfileGenerator",
15-
"SingleProfileGenerator",
19+
"FixedRateProfileGenerator",
1620
"SweepProfileGenerator",
1721
]

src/guidellm/executor/executor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def __init__(
2020
self,
2121
backend: Backend,
2222
request_generator: RequestGenerator,
23-
profile_mode: ProfileGenerationMode = ProfileGenerationMode.SINGLE,
23+
profile_mode: ProfileGenerationMode = ProfileGenerationMode.SWEEP,
2424
profile_args: Optional[Dict[str, Any]] = None,
2525
max_requests: Optional[int] = None,
2626
max_duration: Optional[float] = None,

src/guidellm/executor/profile_generator.py

+48-24
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from abc import ABC, abstractmethod
22
from dataclasses import dataclass
33
from enum import Enum
4-
from typing import Dict, Optional, Type, Union
4+
from typing import Dict, List, Optional, Type, Union
55

66
import numpy
77

@@ -12,16 +12,30 @@
1212
"ProfileGenerationMode",
1313
"Profile",
1414
"ProfileGenerator",
15-
"SingleProfileGenerator",
15+
"FixedRateProfileGenerator",
1616
"SweepProfileGenerator",
1717
]
1818

19+
rate_type_to_load_gen_mode = {
20+
"synchronous": LoadGenerationMode.SYNCHRONOUS,
21+
"constant": LoadGenerationMode.CONSTANT,
22+
"poisson": LoadGenerationMode.POISSON,
23+
}
24+
1925

2026
class ProfileGenerationMode(Enum):
21-
SINGLE = "single"
27+
FIXED_RATE = "fixed_rate"
2228
SWEEP = "sweep"
2329

2430

31+
rate_type_to_profile_mode = {
32+
"synchronous": ProfileGenerationMode.FIXED_RATE,
33+
"constant": ProfileGenerationMode.FIXED_RATE,
34+
"poisson": ProfileGenerationMode.FIXED_RATE,
35+
"sweep": ProfileGenerationMode.SWEEP,
36+
}
37+
38+
2539
@dataclass
2640
class Profile:
2741
load_gen_mode: LoadGenerationMode
@@ -55,34 +69,44 @@ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profil
5569
pass
5670

5771

58-
@ProfileGenerator.register(ProfileGenerationMode.SINGLE)
59-
class SingleProfileGenerator(ProfileGenerator):
60-
def __init__(self, rate: float, rate_type: LoadGenerationMode):
61-
super().__init__(ProfileGenerationMode.SINGLE)
62-
self._rate: float = rate
63-
self._rate_type: LoadGenerationMode = rate_type
72+
@ProfileGenerator.register(ProfileGenerationMode.FIXED_RATE)
73+
class FixedRateProfileGenerator(ProfileGenerator):
74+
def __init__(
75+
self,
76+
load_gen_mode: Optional[LoadGenerationMode],
77+
rates: Optional[List[float]] = None,
78+
**kwargs,
79+
):
80+
super().__init__(ProfileGenerationMode.FIXED_RATE)
81+
if load_gen_mode == LoadGenerationMode.SYNCHRONOUS and rates and len(rates) > 0:
82+
raise ValueError("custom rates are not supported in synchronous mode")
83+
self._rates: Optional[List[float]] = rates
84+
self._load_gen_mode = load_gen_mode
6485
self._generated: bool = False
86+
self._rate_index: int = 0
6587

6688
def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]:
67-
if self._generated:
68-
return None
69-
70-
self._generated = True
71-
72-
if self._rate_type == LoadGenerationMode.CONSTANT:
73-
return Profile(
74-
load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=self._rate
75-
)
76-
elif self._rate_type == LoadGenerationMode.SYNCHRONOUS:
89+
if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS:
90+
if self._generated:
91+
return None
92+
self._generated = True
7793
return Profile(
7894
load_gen_mode=LoadGenerationMode.SYNCHRONOUS, load_gen_rate=None
7995
)
80-
elif self._rate_type == LoadGenerationMode.POISSON:
81-
return Profile(
82-
load_gen_mode=LoadGenerationMode.POISSON, load_gen_rate=self._rate
83-
)
96+
elif self._load_gen_mode in {
97+
LoadGenerationMode.CONSTANT,
98+
LoadGenerationMode.POISSON,
99+
}:
100+
if self._rates:
101+
if self._rate_index >= len(self._rates):
102+
return None
103+
current_rate = self._rates[self._rate_index]
104+
self._rate_index += 1
105+
return Profile(
106+
load_gen_mode=self._load_gen_mode, load_gen_rate=current_rate
107+
)
84108

85-
raise ValueError(f"Invalid rate type: {self._rate_type}")
109+
raise ValueError(f"Invalid rate type: {self._load_gen_mode}")
86110

87111

88112
@ProfileGenerator.register(ProfileGenerationMode.SWEEP)

src/guidellm/main.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22

33
from guidellm.backend import Backend
44
from guidellm.core import TextGenerationBenchmarkReport
5-
from guidellm.executor import Executor
5+
from guidellm.executor import (
6+
Executor,
7+
rate_type_to_load_gen_mode,
8+
rate_type_to_profile_mode,
9+
)
610
from guidellm.request import (
711
EmulatedRequestGenerator,
812
FileRequestGenerator,
@@ -45,8 +49,9 @@
4549
@click.option(
4650
"--rate",
4751
type=float,
48-
default="1.0",
52+
default=[1.0],
4953
help="Rate to use for constant and poisson rate types",
54+
multiple=True,
5055
)
5156
@click.option(
5257
"--num-seconds",
@@ -106,12 +111,16 @@ def main(
106111
else:
107112
raise ValueError(f"Unknown data type: {data_type}")
108113

114+
profile_mode = rate_type_to_profile_mode.get(rate_type)
115+
load_gen_mode = rate_type_to_load_gen_mode.get(rate_type, None)
116+
if not profile_mode or not load_gen_mode:
117+
raise ValueError("Invalid rate type")
109118
# Create executor
110119
executor = Executor(
111120
request_generator=request_generator,
112121
backend=backend,
113-
profile_mode=rate_type,
114-
profile_args={"rate_type": rate_type, "rate": rate},
122+
profile_mode=profile_mode,
123+
profile_args={"load_gen_mode": load_gen_mode, "rates": rate},
115124
max_requests=num_requests,
116125
max_duration=num_seconds,
117126
)

src/guidellm/scheduler/load_generator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class LoadGenerationMode(str, Enum):
1616
1717
"""
1818

19-
SYNCHRONOUS = "sync"
19+
SYNCHRONOUS = "synchronous"
2020
CONSTANT = "constant"
2121
POISSON = "poisson"
2222

tests/integration/executor/test_report_generation.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ def test_executor_openai_single_report_generation_sync_mode(
1616
request_genrator = dummy.services.TestRequestGenerator(
1717
tokenizer="bert-base-uncased"
1818
)
19-
profile_generation_mode = ProfileGenerationMode.SINGLE
19+
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
2020
profile_generator_kwargs = {
21-
"rate_type": LoadGenerationMode.SYNCHRONOUS,
22-
"rate": 1.0,
21+
"load_gen_mode": LoadGenerationMode.SYNCHRONOUS,
2322
}
2423

2524
executor = Executor(
@@ -55,10 +54,10 @@ def test_executor_openai_single_report_generation_constant_mode_infinite(
5554
request_genrator = dummy.services.TestRequestGenerator(
5655
tokenizer="bert-base-uncased"
5756
)
58-
profile_generation_mode = ProfileGenerationMode.SINGLE
57+
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
5958
profile_generator_kwargs = {
60-
"rate_type": LoadGenerationMode.CONSTANT,
61-
"rate": 1.0,
59+
"load_gen_mode": LoadGenerationMode.CONSTANT,
60+
"rates": [1.0],
6261
}
6362

6463
executor = Executor(
@@ -88,10 +87,10 @@ def test_executor_openai_single_report_generation_constant_mode_limited(
8887
request_genrator = dummy.services.TestRequestGenerator(
8988
tokenizer="bert-base-uncased"
9089
)
91-
profile_generation_mode = ProfileGenerationMode.SINGLE
90+
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
9291
profile_generator_kwargs = {
93-
"rate_type": LoadGenerationMode.CONSTANT,
94-
"rate": 1.0,
92+
"load_gen_mode": LoadGenerationMode.CONSTANT,
93+
"rates": [1.0],
9594
}
9695

9796
executor = Executor(
@@ -124,10 +123,10 @@ def test_executor_openai_single_report_generation_constant_mode_failed(
124123
request_genrator = dummy.services.TestRequestGenerator(
125124
tokenizer="bert-base-uncased"
126125
)
127-
profile_generation_mode = ProfileGenerationMode.SINGLE
126+
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
128127
profile_generator_kwargs = {
129-
"rate_type": LoadGenerationMode.CONSTANT,
130-
"rate": 1.0,
128+
"load_gen_mode": LoadGenerationMode.CONSTANT,
129+
"rates": [1.0],
131130
}
132131

133132
executor = Executor(
@@ -153,10 +152,10 @@ def test_executor_openai_single_report_generation_constant_mode_cancelled_report
153152
request_genrator = dummy.services.TestRequestGenerator(
154153
tokenizer="bert-base-uncased"
155154
)
156-
profile_generation_mode = ProfileGenerationMode.SINGLE
155+
profile_generation_mode = ProfileGenerationMode.FIXED_RATE
157156
profile_generator_kwargs = {
158-
"rate_type": LoadGenerationMode.CONSTANT,
159-
"rate": 1.0,
157+
"load_gen_mode": LoadGenerationMode.CONSTANT,
158+
"rates": [1.0],
160159
}
161160

162161
executor = Executor(

tests/unit/executor/test_executor.py

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
import pytest
4+
5+
from guidellm.backend.base import Backend
6+
from guidellm.executor import Executor, Profile, ProfileGenerator
7+
from guidellm.executor.profile_generator import ProfileGenerationMode
8+
from guidellm.request.base import RequestGenerator
9+
from guidellm.scheduler import LoadGenerationMode
10+
11+
12+
def test_executor_creation():
13+
mock_request_generator = MagicMock(spec=RequestGenerator)
14+
mock_backend = MagicMock(spec=Backend)
15+
profile_mode = ProfileGenerationMode.SWEEP
16+
profile_args = None
17+
max_requests = None
18+
max_duration = None
19+
executor = Executor(
20+
mock_backend,
21+
mock_request_generator,
22+
profile_mode,
23+
profile_args,
24+
max_requests,
25+
max_duration,
26+
)
27+
assert executor.request_generator == mock_request_generator
28+
assert executor.backend == mock_backend
29+
assert executor.max_requests == max_requests
30+
assert executor.max_duration == max_duration
31+
32+
33+
@pytest.fixture
34+
def mock_request_generator():
35+
return MagicMock(spec=RequestGenerator)
36+
37+
38+
@pytest.fixture
39+
def mock_backend():
40+
return MagicMock(spec=Backend)
41+
42+
43+
@pytest.fixture
44+
def mock_scheduler():
45+
with patch("guidellm.executor.executor.Scheduler") as MockScheduler:
46+
yield MockScheduler
47+
48+
49+
def test_executor_run(mock_request_generator, mock_backend, mock_scheduler):
50+
mock_profile_generator = MagicMock(spec=ProfileGenerator)
51+
profiles = [
52+
Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=1.0),
53+
Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=2.0),
54+
None,
55+
]
56+
mock_profile_generator.next.side_effect = profiles
57+
58+
with patch(
59+
"guidellm.executor.executor.ProfileGenerator.create",
60+
return_value=mock_profile_generator,
61+
):
62+
executor = Executor(
63+
request_generator=mock_request_generator,
64+
backend=mock_backend,
65+
profile_mode=ProfileGenerationMode.FIXED_RATE,
66+
profile_args={
67+
"load_gen_mode": LoadGenerationMode.CONSTANT,
68+
"rates": [1.0, 2.0],
69+
},
70+
max_requests=10,
71+
max_duration=100,
72+
)
73+
74+
mock_benchmark = MagicMock()
75+
mock_scheduler.return_value.run.return_value = mock_benchmark
76+
77+
report = executor.run()
78+
79+
assert mock_scheduler.call_count == 2
80+
assert len(report.benchmarks) == 2
81+
assert report.benchmarks[0] == mock_benchmark
82+
assert report.benchmarks[1] == mock_benchmark
83+
calls = mock_scheduler.call_args_list
84+
assert calls[0][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT
85+
assert calls[0][1]["load_gen_rate"] == 1.0
86+
assert calls[1][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT
87+
assert calls[1][1]["load_gen_rate"] == 2.0

tests/unit/executor/test_single_profile_generation_mode.py renamed to tests/unit/executor/test_fixed_rate_profile_generation_mode.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import List, Optional
2+
13
import pytest
24

35
from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport
@@ -24,12 +26,15 @@ def test_executor_single_profile_generator_benchmark_report(
2426
request_genrator = dummy.services.TestRequestGenerator(
2527
tokenizer="bert-base-uncased"
2628
)
27-
profile_generator_kwargs = {"rate_type": load_gen_mode, "rate": 1.0}
29+
rates: Optional[List[float]] = [1.0]
30+
if load_gen_mode == LoadGenerationMode.SYNCHRONOUS:
31+
rates = None
32+
profile_generator_kwargs = {"load_gen_mode": load_gen_mode, "rates": rates}
2833

2934
executor = Executor(
3035
backend=openai_backend_factory(),
3136
request_generator=request_genrator,
32-
profile_mode=ProfileGenerationMode.SINGLE,
37+
profile_mode=ProfileGenerationMode.FIXED_RATE,
3338
profile_args=profile_generator_kwargs,
3439
max_requests=1,
3540
max_duration=None,

0 commit comments

Comments
 (0)