test: Fix flaky test_autoscales by replacing fixed sleeps with poll-based assertions (#1835)

Lidang-Jiang · vdusek · web-flow · commit 58156ca07e19 · 2026-04-14T12:57:39.000+02:00
Fixes #1655 — `test_autoscales` is flaky on Windows and macOS. The root cause is that the test relies on fixed `asyncio.sleep()` durations to assert autoscaling behavior, but event loop scheduling jitter on Windows/macOS can cause the autoscaler to not complete enough cycles within the expected time window (e.g., `desired_concurrency` only reaches 3 instead of 4). --------- Co-authored-by: Vlada Dusek <v.dusek96@gmail.com>
diff --git a/tests/unit/_autoscaling/test_autoscaled_pool.py b/tests/unit/_autoscaling/test_autoscaled_pool.py
@@ -4,7 +4,7 @@
 
 import asyncio
 from contextlib import suppress
-from datetime import datetime, timedelta, timezone
+from datetime import timedelta
 from itertools import chain, repeat
 from typing import TYPE_CHECKING, TypeVar, cast
 from unittest.mock import Mock
@@ -15,19 +15,19 @@
 from crawlee._autoscaling._types import LoadRatioInfo, SystemInfo
 from crawlee._types import ConcurrencySettings
 from crawlee._utils.time import measure_time
+from tests.unit.utils import wait_for_condition
 
 if TYPE_CHECKING:
     from collections.abc import Awaitable
 
+T = TypeVar('T')
+
 
 @pytest.fixture
 def system_status() -> SystemStatus | Mock:
     return Mock(spec=SystemStatus)
 
 
-T = TypeVar('T')
-
-
 def future(value: T, /) -> Awaitable[T]:
     f = asyncio.Future[T]()
     f.set_result(value)
@@ -145,10 +145,6 @@ async def run() -> None:
         await pool.run()
 
 
-@pytest.mark.flaky(
-    rerun=3,
-    reason='Test is flaky on Windows and MacOS, see https://github.com/apify/crawlee-python/issues/1655.',
-)
 async def test_autoscales(
     monkeypatch: pytest.MonkeyPatch,
     system_status: SystemStatus | Mock,
@@ -160,7 +156,7 @@ async def run() -> None:
         nonlocal done_count
         done_count += 1
 
-    start = datetime.now(timezone.utc)
+    overload_active = False
 
     def get_historical_system_info() -> SystemInfo:
         result = SystemInfo(
@@ -170,8 +166,7 @@ def get_historical_system_info() -> SystemInfo:
             client_info=LoadRatioInfo(limit_ratio=0.9, actual_ratio=0.3),
         )
 
-        # 0.5 seconds after the start of the test, pretend the CPU became overloaded
-        if result.created_at - start >= timedelta(seconds=0.5):
+        if overload_active:
             result.cpu_info = LoadRatioInfo(limit_ratio=0.9, actual_ratio=1.0)
 
         return result
@@ -196,24 +191,21 @@ def get_historical_system_info() -> SystemInfo:
     pool_run_task = asyncio.create_task(pool.run(), name='pool run task')
 
     try:
-        # After 0.2s, there should be an increase in concurrency
-        await asyncio.sleep(0.2)
-        assert pool.desired_concurrency > 1
+        # Wait until concurrency scales up above 1.
+        await wait_for_condition(lambda: pool.desired_concurrency > 1, timeout=5.0)
 
-        # After 0.5s, the concurrency should reach max concurrency
-        await asyncio.sleep(0.3)
-        assert pool.desired_concurrency == 4
+        # Wait until concurrency reaches maximum.
+        await wait_for_condition(lambda: pool.desired_concurrency == 4, timeout=5.0)
 
-        # The concurrency should guarantee completion of more than 10 tasks (a single worker would complete ~5)
-        assert done_count > 10
+        # Multiple concurrent workers should have completed more tasks than a single worker could.
+        await wait_for_condition(lambda: done_count > 10, timeout=5.0)
 
-        # After 0.7s, the pretend overload should have kicked in and there should be a drop in desired concurrency
-        await asyncio.sleep(0.2)
-        assert pool.desired_concurrency < 4
+        # Simulate CPU overload and wait for the pool to scale down.
+        overload_active = True
+        await wait_for_condition(lambda: pool.desired_concurrency < 4, timeout=5.0)
 
-        # After a full second, the pool should scale down all the way to 1
-        await asyncio.sleep(0.3)
-        assert pool.desired_concurrency == 1
+        # Wait until the pool scales all the way down to minimum.
+        await wait_for_condition(lambda: pool.desired_concurrency == 1, timeout=5.0)
     finally:
         pool_run_task.cancel()
         with suppress(asyncio.CancelledError):
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
@@ -1,5 +1,34 @@
+from __future__ import annotations
+
+import asyncio
 import sys
+from typing import TYPE_CHECKING
 
 import pytest
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
 run_alone_on_mac = pytest.mark.run_alone if sys.platform == 'darwin' else lambda x: x
+
+
+async def wait_for_condition(
+    condition: Callable[[], bool],
+    *,
+    timeout: float = 5.0,
+    poll_interval: float = 0.05,
+) -> None:
+    """Poll `condition` until it returns True, or raise `AssertionError` on timeout.
+
+    Args:
+        condition: A callable that returns True when the desired state is reached.
+        timeout: Maximum time in seconds to wait before raising.
+        poll_interval: Time in seconds between condition checks.
+    """
+    loop = asyncio.get_running_loop()
+    deadline = loop.time() + timeout
+    while loop.time() < deadline:
+        if condition():
+            return
+        await asyncio.sleep(poll_interval)
+    raise AssertionError(f'Condition not met within {timeout}s')