vllm-project · albertoperdomo2 · Oct 21, 2025 · sjmonson · Oct 28, 2025 · sjmonson
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -162,9 +162,28 @@ def benchmark():
     help=(
         "Benchmark rate(s) to test. Meaning depends on profile: "
         "sweep=number of benchmarks, concurrent=concurrent requests, "
-        "async/constant/poisson=requests per second."
+        "async/constant/poisson=requests per second. "
+        "Not used for incremental profile."
     ),
 )
+@click.option(
+    "--start-rate",
+    type=float,
+    default=BenchmarkGenerativeTextArgs.get_default("start_rate"),
+    help="Initial rate for incremental profile in requests per second.",
+)
+@click.option(
+    "--increment-factor",
+    type=float,
+    default=BenchmarkGenerativeTextArgs.get_default("increment_factor"),
+    help="Factor by which to increase rate over time for incremental profile.",
+)
+@click.option(
+    "--rate-limit",
+    type=int,
+    default=BenchmarkGenerativeTextArgs.get_default("rate_limit"),
+    help="Maximum rate cap for incremental profile.",
+)
 # Backend configuration
 @click.option(
     "--backend",

diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py
@@ -21,6 +21,7 @@
 from .profile import (
     AsyncProfile,
     ConcurrentProfile,
+    IncrementalProfile,
     Profile,
     ProfileType,
     SweepProfile,
@@ -69,6 +70,7 @@
     "GenerativeMetrics",
     "GenerativeMetricsSummary",
     "GenerativeVideoMetricsSummary",
+    "IncrementalProfile",
     "Profile",
     "ProfileType",
     "SchedulerDict",

diff --git a/src/guidellm/benchmark/profile.py b/src/guidellm/benchmark/profile.py
@@ -27,6 +27,7 @@
 from guidellm import settings
 from guidellm.scheduler import (
     AsyncConstantStrategy,
+    AsyncIncrementalStrategy,
     AsyncPoissonStrategy,
     ConcurrentStrategy,
     Constraint,
@@ -45,14 +46,17 @@
 __all__ = [
     "AsyncProfile",
     "ConcurrentProfile",
+    "IncrementalProfile",
     "Profile",
     "ProfileType",
     "SweepProfile",
     "SynchronousProfile",
     "ThroughputProfile",
 ]
 
-ProfileType = Literal["synchronous", "concurrent", "throughput", "async", "sweep"]
+ProfileType = Literal[
+    "synchronous", "concurrent", "throughput", "async", "sweep", "incremental"
+]
 
 
 class Profile(
@@ -707,3 +711,120 @@ def next_strategy(
             )
         else:
             raise ValueError(f"Invalid strategy type: {self.strategy_type}")
+
+
+@Profile.register("incremental")
+class IncrementalProfile(ThroughputProfile):
+    """
+    Incremental rate execution profile with incremental load over time.
+
+    Schedules requests starting at a base rate and incrementally increasing
+    the rate by a factor over time until reaching an optional rate limit.
+    """
+
+    type_: Literal["incremental"] = "incremental"  # type: ignore[assignment]
+    start_rate: PositiveFloat = Field(
+        description="Initial rate at which to schedule requests in requests per second",
+    )
+    increment_factor: PositiveFloat = Field(
+        description="Factor by which to increase the rate over time",
+    )
+    rate_limit: PositiveInt | None = Field(
+        default=None,
+        description="Maximum rate cap after which load remains constant",
+    )
+    initial_burst: bool = Field(
+        default=True,
+        description=(
+            "Whether to send initial burst of math.floor(start_rate) requests "
+            "to reach target rate"
+        ),
+    )
+
+    @classmethod
+    def resolve_args(
+        cls,
+        rate_type: str,
+        rate: list[float] | None,
+        random_seed: int,
+        start_rate: float | None = None,
+        increment_factor: float | None = None,
+        rate_limit: int | None = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Resolve arguments for incremental profile construction.
+
+        :param rate_type: Profile type identifier
+        :param rate: Rate parameter (must be None for incremental)
+        :param random_seed: Random seed (ignored)
+        :param start_rate: Initial rate in requests per second
+        :param increment_factor: Rate increase factor over time
+        :param rate_limit: Optional maximum rate cap
+        :param kwargs: Additional arguments passed through unchanged
+        :return: Resolved arguments dictionary
+        :raises ValueError: If rate is not None or required params missing
+        """
+        _ = random_seed  # unused
+        if rate_type != "incremental":
+            raise ValueError("Rate type must be 'incremental' for incremental profile")
+
+        if rate is not None:
+            raise ValueError(
+                "rate does not apply to incremental profile, it must be set to None "
+                "or not set at all. Use start_rate and increment_factor instead."
+            )
+
+        if start_rate is None:
+            raise ValueError("start_rate is required for incremental profile")
+
+        if increment_factor is None:
+            raise ValueError("increment_factor is required for incremental profile")
+
+        if start_rate <= 0:
+            raise ValueError("start_rate must be a positive number")
+
+        if increment_factor <= 0:
+            raise ValueError("increment_factor must be a positive number")
+
+        if rate_limit is not None and rate_limit <= 0:
+            raise ValueError("rate_limit must be a positive integer")
+
+        kwargs["start_rate"] = start_rate
+        kwargs["increment_factor"] = increment_factor
+        if rate_limit is not None:
+            kwargs["rate_limit"] = rate_limit
+
+        return kwargs
+
+    @property
+    def strategy_types(self) -> list[StrategyType]:
+        """
+        :return: Single incremental strategy type
+        """
+        return [self.type_]
+
+    def next_strategy(
+        self,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> AsyncIncrementalStrategy | None:
+        """
+        Generate incremental strategy or None if already completed.
+
+        :param prev_strategy: Previously completed strategy (unused)
+        :param prev_benchmark: Benchmark results from previous execution (unused)
+        :return: AsyncIncrementalStrategy for first execution, None afterward
+        """
+        _ = (prev_strategy, prev_benchmark)  # unused
+        if len(self.completed_strategies) >= 1:
+            return None
+
+        return AsyncIncrementalStrategy(
+            start_rate=self.start_rate,
+            increment_factor=self.increment_factor,
+            rate_limit=self.rate_limit,
+            initial_burst=self.initial_burst,
+            max_concurrency=self.max_concurrency,
+            startup_duration=self.startup_duration,
+        )
diff --git a/src/guidellm/benchmark/schemas.py b/src/guidellm/benchmark/schemas.py
@@ -1840,6 +1840,18 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
     rate: float | list[float] | None = Field(
         default=None, description="Request rate(s) for rate-based scheduling"
     )
+    start_rate: float | None = Field(
+        default=None,
+        description="Initial rate for incremental profile in requests per second",
+    )
+    increment_factor: float | None = Field(
+        default=None,
+        description="Factor by which to increase rate over time for incremental profile",
+    )
+    rate_limit: int | None = Field(
+        default=None,
+        description="Maximum rate cap for incremental profile",
+    )
     # Backend configuration
     backend: BackendType | Backend = Field(
         default="openai_http", description="Backend type or instance for execution"

diff --git a/src/guidellm/scheduler/__init__.py b/src/guidellm/scheduler/__init__.py
@@ -38,6 +38,7 @@
 )
 from .strategies import (
     AsyncConstantStrategy,
+    AsyncIncrementalStrategy,
     AsyncPoissonStrategy,
     ConcurrentStrategy,
     SchedulingStrategy,
@@ -51,6 +52,7 @@
 
 __all__ = [
     "AsyncConstantStrategy",
+    "AsyncIncrementalStrategy",
     "AsyncPoissonStrategy",
     "BackendInterface",
     "BackendT",

diff --git a/src/guidellm/scheduler/strategies.py b/src/guidellm/scheduler/strategies.py
@@ -12,6 +12,7 @@
 from __future__ import annotations
 
 import asyncio
+import math
 import random
 import time
 from abc import abstractmethod
@@ -25,6 +26,7 @@
 
 __all__ = [
     "AsyncConstantStrategy",
+    "AsyncIncrementalStrategy",
     "AsyncPoissonStrategy",
     "ConcurrentStrategy",
     "SchedulingStrategy",
@@ -36,7 +38,9 @@
 
 
 StrategyType = Annotated[
-    Literal["synchronous", "concurrent", "throughput", "constant", "poisson"],
+    Literal[
+        "synchronous", "concurrent", "throughput", "constant", "poisson", "incremental"
+    ],
     "Valid strategy type identifiers for scheduling request patterns",
 ]
 
@@ -517,3 +521,114 @@ def request_completed(self, request_info: RequestInfo):
         :param request_info: Completed request metadata (unused)
         """
         _ = request_info  # request_info unused for async poisson strategy
+
+
+@SchedulingStrategy.register("incremental")
+class AsyncIncrementalStrategy(ThroughputStrategy):
+    """
+    Incremental rate scheduling with gradual load increase over time.
+
+    Schedules requests starting at a base rate and incrementally increasing
+    the rate by a factor over time until reaching an optional rate limit.
+    Supports initial burst mode to quickly reach the target starting rate.
+    Useful for finding system saturation points or progressive load testing.
+    """
+
+    type_: Literal["incremental"] = "incremental"  # type: ignore[assignment]
+    start_rate: float = Field(
+        description="Initial rate at which to schedule requests in requests/second",
+        gt=0,
+    )
+    increment_factor: float = Field(
+        description="Factor by which to increase the rate over time",
+        gt=0,
+    )
+    rate_limit: int | None = Field(
+        default=None,
+        description="Maximum rate cap after which load remains constant",
+        gt=0,
+    )
+    initial_burst: bool = Field(
+        default=True,
+        description=(
+            "Whether to send initial burst of math.floor(start_rate) requests "
+            "to reach target rate"
+        ),
+    )
+
+    _process_offset: float | None = PrivateAttr(None)
+    _burst_sent: bool = PrivateAttr(False)
+
+    def __str__(self) -> str:
+        """
+        :return: String identifier with start rate and increment factor
+        """
+        return f"incremental@{self.start_rate:.2f}+{self.increment_factor:.2f}"
+
+    def init_processes_timings(
+        self,
+        worker_count: int,
+        max_concurrency: int,
+        startup_duration: float,
+    ):
+        """
+        Initialize incremental-specific timing state.
+
+        :param worker_count: Number of worker processes to coordinate
+        :param max_concurrency: Maximum number of concurrent requests allowed
+        :param startup_duration: Duration in seconds for request startup ramping
+        """
+        super().init_processes_timings(worker_count, max_concurrency, startup_duration)
+        with self._processes_lock:
+            self._process_offset = None
+
+    async def next_request_time(self, offset: int) -> float:
+        """
+        Calculate next request time with incremental rate increase.
+
+        Implements gradual rate increase: rate = start_rate + (increment_factor * elapsed_time)
+        Optionally sends initial burst and caps at rate_limit.
+
+        :param offset: Unused for incremental strategy
+        :return: Next request time based on incremental rate calculation
+        """
+        _ = offset  # offset unused for incremental strategy
+        start_time = await self.get_processes_start_time()
+
+        # Handle initial burst if enabled
+        if self.initial_burst and not self._burst_sent:
+            self._burst_sent = True
+            burst_count = math.floor(self.start_rate)
+            for _ in range(burst_count):
+                pass
+            if self._process_offset is None:
+                self._process_offset = start_time
+
+        if self._process_offset is None:
+            self._process_offset = start_time
+
+        current_time = time.time()
+        if current_time <= start_time:
+            return start_time
+
+        # Calculate current rate based on elapsed time
+        elapsed_time = current_time - start_time
+        next_rate = self.start_rate + (self.increment_factor * elapsed_time)
+
+        # Cap at rate limit if specified
+        if self.rate_limit and next_rate >= self.rate_limit:
+            increment = 1.0 / self.rate_limit
+        else:
+            increment = 1.0 / next_rate
+
+        self._process_offset += increment
+
+        return self._process_offset
+
+    def request_completed(self, request_info: RequestInfo):
+        """
+        Handle request completion (no-op for incremental strategy).
+
+        :param request_info: Completed request metadata (unused)
+        """
+        _ = request_info  # request_info unused for async incremental strategy