mlsys-io · kaiitunnz · Jun 13, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
@@ -95,91 +95,12 @@ jobs:
         run: |
           grep -v '@ git+' src/worker/requirements/requirements.txt > /tmp/requirements-worker-cpu-audit.txt
           uvx pip-audit==2.9.0 --strict \
-            --ignore-vuln GHSA-69w3-r845-3855 \
-            --ignore-vuln GHSA-cfh3-3jmp-rvhc \
-            --ignore-vuln GHSA-whj4-6x5x-4v2j \
-            --ignore-vuln GHSA-wjx4-4jcj-g98j \
-            --ignore-vuln GHSA-5xmw-vc9v-4wf2 \
-            --ignore-vuln GHSA-r73j-pqj5-w3x7 \
-            --ignore-vuln GHSA-pwv6-vv43-88gr \
-            --ignore-vuln GHSA-vfmq-68hx-4jfw \
-            --ignore-vuln GHSA-j7w6-vpvq-j3gm \
-            --ignore-vuln GHSA-98h9-4798-4q5v \
-            --ignore-vuln GHSA-7wx4-6vff-v64p \
             --ignore-vuln GHSA-rrmf-rvhw-rf47 \
-            --ignore-vuln PYSEC-2025-189 \
-            --ignore-vuln PYSEC-2025-190 \
-            --ignore-vuln PYSEC-2025-191 \
-            --ignore-vuln PYSEC-2025-192 \
-            --ignore-vuln PYSEC-2025-193 \
-            --ignore-vuln PYSEC-2025-194 \
-            --ignore-vuln PYSEC-2025-195 \
-            --ignore-vuln PYSEC-2025-196 \
-            --ignore-vuln PYSEC-2025-197 \
-            --ignore-vuln PYSEC-2025-210 \
-            --ignore-vuln PYSEC-2026-139 \
-            --ignore-vuln PYSEC-2025-211 \
-            --ignore-vuln PYSEC-2025-212 \
-            --ignore-vuln PYSEC-2025-213 \
-            --ignore-vuln PYSEC-2025-214 \
-            --ignore-vuln PYSEC-2025-215 \
-            --ignore-vuln PYSEC-2025-216 \
-            --ignore-vuln PYSEC-2025-217 \
-            --ignore-vuln PYSEC-2025-218 \
-            --ignore-vuln PYSEC-2026-97 \
-            --ignore-vuln PYSEC-2025-183 \
-            --ignore-vuln PYSEC-2024-277 \
+            --ignore-vuln PYSEC-2026-87 \
             -r /tmp/requirements-worker-cpu-audit.txt
-      - name: Run pip-audit (worker GPU delta)  # no --strict: flashinfer-jit-cache is unauditable on PyPI
+      - name: Run pip-audit (worker GPU delta)  # no --strict: flashinfer-jit-cache and the vllm +cu129 wheel are unauditable on PyPI
         run: |
           uvx pip-audit==2.9.0 \
-            --ignore-vuln GHSA-69w3-r845-3855 \
-            --ignore-vuln GHSA-cfh3-3jmp-rvhc \
-            --ignore-vuln GHSA-whj4-6x5x-4v2j \
-            --ignore-vuln GHSA-wjx4-4jcj-g98j \
-            --ignore-vuln GHSA-5xmw-vc9v-4wf2 \
-            --ignore-vuln GHSA-r73j-pqj5-w3x7 \
-            --ignore-vuln GHSA-pwv6-vv43-88gr \
-            --ignore-vuln GHSA-vfmq-68hx-4jfw \
-            --ignore-vuln GHSA-pf3h-qjgv-vcpr \
-            --ignore-vuln GHSA-pq5c-rjhq-qp7p \
-            --ignore-vuln GHSA-3mwp-wvh9-7528 \
-            --ignore-vuln GHSA-hpv8-x276-m59f \
-            --ignore-vuln GHSA-39mp-8hj3-5c49 \
-            --ignore-vuln GHSA-h3h8-3v2v-rg7m \
-            --ignore-vuln GHSA-jmh7-g254-2cq9 \
-            --ignore-vuln GHSA-pfjf-5gxr-995x \
-            --ignore-vuln GHSA-w8v5-vhqr-4h9v \
-            --ignore-vuln GHSA-x368-4g9h-fvv4 \
-            --ignore-vuln GHSA-83vm-p52w-f9pw \
-            --ignore-vuln GHSA-3ww4-5jv9-j5gm \
-            --ignore-vuln GHSA-j7w6-vpvq-j3gm \
-            --ignore-vuln GHSA-98h9-4798-4q5v \
-            --ignore-vuln GHSA-7wx4-6vff-v64p \
             --ignore-vuln GHSA-rrmf-rvhw-rf47 \
-            --ignore-vuln PYSEC-2025-189 \
-            --ignore-vuln PYSEC-2025-190 \
-            --ignore-vuln PYSEC-2025-191 \
-            --ignore-vuln PYSEC-2025-192 \
-            --ignore-vuln PYSEC-2025-193 \
-            --ignore-vuln PYSEC-2025-194 \
-            --ignore-vuln PYSEC-2025-195 \
-            --ignore-vuln PYSEC-2025-196 \
-            --ignore-vuln PYSEC-2025-197 \
-            --ignore-vuln PYSEC-2025-210 \
-            --ignore-vuln PYSEC-2026-139 \
-            --ignore-vuln PYSEC-2025-211 \
-            --ignore-vuln PYSEC-2025-212 \
-            --ignore-vuln PYSEC-2025-213 \
-            --ignore-vuln PYSEC-2025-214 \
-            --ignore-vuln PYSEC-2025-215 \
-            --ignore-vuln PYSEC-2025-216 \
-            --ignore-vuln PYSEC-2025-217 \
-            --ignore-vuln PYSEC-2025-218 \
-            --ignore-vuln PYSEC-2026-97 \
-            --ignore-vuln PYSEC-2025-183 \
-            --ignore-vuln PYSEC-2024-277 \
-            --ignore-vuln PYSEC-2025-222 \
-            --ignore-vuln PYSEC-2024-274 \
-            --ignore-vuln PYSEC-2026-161 \
+            --ignore-vuln GHSA-w8v5-vhqr-4h9v \
             -r src/worker/requirements/requirements.gpu.txt
@@ -94,58 +94,24 @@ upgrade-blocker. The currently-ignored advisories and the upgrade
 blocker that justifies each are listed below; the same list is encoded
 as `--ignore-vuln` flags in `.github/workflows/security.yml`.
 
+The worker GPU `vllm` is pinned to the `+cu129` release wheel (the PyPI
+wheel is built for CUDA 13, incompatible with the CUDA 12.9 worker). Its
+local version is not on PyPI, so pip-audit skips it — like
+`flashinfer-jit-cache` — which is why the GPU run omits `--strict`. vLLM
+CVE exposure tracks PyPI `vllm 0.22.0` regardless of the build variant.
+
 | Advisory | Package | Fix version | Why ignored |
 |----------|---------|-------------|-------------|
-| GHSA-69w3-r845-3855 | transformers | 5.0.0rc3 | held by vllm/vllm-omni 0.18 compatibility |
-| GHSA-pf3h-qjgv-vcpr | vllm | 0.19.0 | held by transformers 4.57 + adjacent inference deps |
-| GHSA-pq5c-rjhq-qp7p | vllm | 0.19.0 | same |
-| GHSA-3mwp-wvh9-7528 | vllm | 0.19.0 | same |
-| GHSA-hpv8-x276-m59f | vllm | 0.20.0 | same |
-| GHSA-x368-4g9h-fvv4 | vllm | 0.19.1 | same |
-| GHSA-83vm-p52w-f9pw | vllm | 0.20.0 | same |
-| GHSA-3ww4-5jv9-j5gm | vllm | 0.22.0 | same |
-| GHSA-cfh3-3jmp-rvhc | pillow | 12.1.1 | gradio 5.50 caps pillow<12 (transitive via vllm-omni) |
-| GHSA-whj4-6x5x-4v2j | pillow | 12.2.0 | same cap |
-| GHSA-wjx4-4jcj-g98j | pillow | 12.2.0 | same cap |
-| GHSA-5xmw-vc9v-4wf2 | pillow | 12.2.0 | same cap |
-| GHSA-r73j-pqj5-w3x7 | pillow | 12.2.0 | same cap |
-| GHSA-pwv6-vv43-88gr | pillow | 12.2.0 | same cap |
-| GHSA-vfmq-68hx-4jfw | lxml | 6.1.0 | crawl4ai 0.8.6 caps lxml<6 |
-| GHSA-39mp-8hj3-5c49 | gradio | 6.7.0 | vllm-omni 0.18 pins gradio==5.50 |
-| GHSA-h3h8-3v2v-rg7m | gradio | 6.6.0 | same pin |
-| GHSA-jmh7-g254-2cq9 | gradio | 6.6.0 | same pin |
-| GHSA-pfjf-5gxr-995x | gradio | 6.6.0 | same pin |
+| GHSA-rrmf-rvhw-rf47 | torch | (none) | no fix version published |
+| PYSEC-2026-87 | lxml | 6.1.0 | crawl4ai 0.8.6 caps lxml<6 |
 | GHSA-w8v5-vhqr-4h9v | diskcache | (none) | upstream unmaintained, no fixed version published |
-| GHSA-j7w6-vpvq-j3gm | diffusers | 0.38.0 | fix requires safetensors>=0.8.0rc0 pre-release; uv lock won't pick up pre-releases without explicit opt-in |
-| GHSA-98h9-4798-4q5v | diffusers | 0.38.0 | same blocker as GHSA-j7w6-vpvq-j3gm — both fixed in 0.38.0 |
-| GHSA-7wx4-6vff-v64p | diffusers | 0.38.0 | same blocker as GHSA-j7w6-vpvq-j3gm — fixed in 0.38.0 |
-| PYSEC-2025-189 | torch | (none) | no fix version published |
-| PYSEC-2025-190 | torch | (none) | same |
-| PYSEC-2025-191 | torch | (none) | same |
-| PYSEC-2025-192 | torch | (none) | same |
-| PYSEC-2025-193 | torch | (none) | same |
-| PYSEC-2025-194 | torch | (none) | same |
-| PYSEC-2025-195 | torch | (none) | same |
-| PYSEC-2025-196 | torch | (none) | same |
-| PYSEC-2025-197 | torch | (none) | same |
-| PYSEC-2025-210 | torch | (none) | same |
-| PYSEC-2026-139 | torch | (none) | same |
-| GHSA-rrmf-rvhw-rf47 | torch | (none) | same |
-| PYSEC-2025-211 | transformers | (none) | no fix version published; transformers also held by vllm-omni 0.18 |
-| PYSEC-2025-212 | transformers | (none) | same |
-| PYSEC-2025-213 | transformers | (none) | same |
-| PYSEC-2025-214 | transformers | (none) | same |
-| PYSEC-2025-215 | transformers | (none) | same |
-| PYSEC-2025-216 | transformers | (none) | same |
-| PYSEC-2025-217 | transformers | (none) | same |
-| PYSEC-2025-218 | transformers | (none) | same |
-| PYSEC-2026-97 | nltk | (none) | no fix version published |
-| PYSEC-2025-183 | pyjwt | (none) | no fix version published |
-| PYSEC-2024-277 | joblib | (none) | no fix version published |
-| PYSEC-2025-222 | vllm | (none) | no fix version published; held by vllm-omni 0.18 pin |
-| PYSEC-2024-274 | gradio | (none) | no fix version published; vllm-omni 0.18 pins gradio==5.50 |
-| PYSEC-2026-161 | starlette | 1.0.1 | gradio 5.50 caps starlette<1.0 (transitive via vllm-omni 0.18) |
-
-When a blocker lifts (e.g. transformers 5 ↔ vllm 0.19 line stabilizes),
-drop the corresponding `--ignore-vuln` flag from the workflow and the
-row from this table — don't extend the rationale to unrelated packages.
+
+The worker GPU audit ignores `GHSA-rrmf-rvhw-rf47` and
+`GHSA-w8v5-vhqr-4h9v`; the worker CPU audit ignores
+`GHSA-rrmf-rvhw-rf47` and `PYSEC-2026-87`; the server audit ignores
+nothing.
+
+When a blocker lifts (e.g. crawl4ai unpins lxml, or a fixed torch
+release ships), drop the corresponding `--ignore-vuln` flag from the
+workflow and the row from this table — don't extend the rationale to
+unrelated packages.
@@ -54,27 +54,27 @@ runtime-worker-core = [
 runtime-inference = [
     "accelerate>=1.12.0",
     "datasets>=4.3.0",
-    "diffusers>=0.36.0",
-    "pillow>=11.3.0",  # held <12 by gradio 5.50 (transitive via vllm-omni)
-    "torch>=2.10.0",
-    "transformers>=4.57.6",  # held at 4.57 by vllm/vllm-omni 0.18 compatibility
+    "diffusers>=0.38.0",
+    "pillow>=12.2.0",
+    "torch>=2.11.0",
+    "transformers>=5.5.1,<5.9.0",
 ]
 runtime-training = [
-    "peft>=0.17.1",
+    "peft>=0.18.0",
     "trl>=0.23.0",
 ]
 runtime-rag = [
-    "fastembed>=0.7.3,<0.8",
+    "fastembed>=0.8.0",
     "qdrant-client>=1.15.1",
 ]
 runtime-inference-gpu = [
     "bitsandbytes>=0.49.2",
-    "flashinfer-python>=0.6.6",
-    "vllm==0.18.0",  # pinned with transformers 4.57; 0.19+ needs transformers 5.x
-    "vllm-omni==0.18.0",  # follows vllm pin
+    "flashinfer-python==0.6.11.post2",
+    "vllm==0.22.0",
+    "vllm-omni==0.22.0",
 ]
 runtime-training-gpu = [
-    "deepspeed>=0.18.1",
+    "deepspeed>=0.19.1",
 ]
 runtime-agent = [
     "aiofiles>=24.1.0",
@@ -240,6 +240,10 @@ flowmesh-cli-stack = { workspace = true }
 flowmesh-hook = { workspace = true }
 flowmesh-sdk = { workspace = true }
 flowmesh-sdk-stack = { workspace = true }
+# vLLM's PyPI wheel is built for CUDA 13 (libcudart.so.13); the GPU worker runs
+# CUDA 12.9. Pin the cu129 release wheel for linux/x86_64 so it matches torch
+# (UV_TORCH_BACKEND=cu129) and flashinfer; other platforms fall back to PyPI.
+vllm = { url = "https://github.com/vllm-project/vllm/releases/download/v0.22.0/vllm-0.22.0+cu129-cp38-abi3-manylinux_2_28_x86_64.whl", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }
 
 [tool.uv.workspace]
 members = ["cli", "cli/stack", "hook", "sdk", "sdk/stack"]
@@ -7,6 +7,7 @@
 """
 
 import gc
+import inspect
 import logging
 import os
 from pathlib import Path
@@ -176,6 +177,13 @@ def _encode_prompt_compat(
                 kwargs["negative_prompt_2"] = negative_prompt
                 kwargs["negative_prompt_3"] = negative_prompt
 
+        # SD1.x/2.x/XL encoders require do_classifier_free_guidance (no default in
+        # diffusers >=0.38); request it so negative embeddings are returned. SD3
+        # has no such parameter, so only pass it when the signature accepts it.
+        encode_params = inspect.signature(self._pipe.encode_prompt).parameters
+        if "do_classifier_free_guidance" in encode_params:
+            kwargs["do_classifier_free_guidance"] = True
+
         return self._pipe.encode_prompt(**kwargs)
 
     def _encode_and_combine_prompts(

@@ -168,8 +168,9 @@ def compute_reward_from_tokens(
                 response_tokens,
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=True,
-            ).strip()
-            response_texts.append(text)
+            )
+            assert isinstance(text, str)
+            response_texts.append(text.strip())
 
         reward_scores = self._score_texts(response_texts).to(device)
 
@@ -1202,7 +1203,8 @@ def _build_ppo_config(
             response_length=response_length,
             save_strategy=save_strategy,
             remove_unused_columns=False,
-            save_safetensors=bool(training_config.get("save_safetensors", False)),
+            fp16=bool(training_config.get("fp16", False)),
+            bf16=bool(training_config.get("bf16", False)),
             **ppo_ctor_kwargs,
         )
 

@@ -476,10 +476,7 @@ def run(self, task: ExecutorTask, out_dir: Path) -> SFTResult:
             )
 
             # Drop heavy references before runner-level cleanup
-            trainer = None
-            model = None  # type: ignore[assignment]
-            tokenizer = None
-            train_dataset = None
+            del trainer, model, tokenizer, train_dataset
             self._current_trainer = None
             self._current_model = None
             self._current_tokenizer = None

@@ -568,6 +568,7 @@ def _run_inner(
             raw_text = self._tok.decode(
                 gen_part, skip_special_tokens=skip_special_tokens
             )
+            assert isinstance(raw_text, str)
             text = raw_text
 
             # Apply simple stop-string truncation on decoded text

@@ -2,9 +2,10 @@
 # Regenerate with: uv run scripts/dev/sync_requirements.py --write
 --extra-index-url https://flashinfer.ai/whl/cu129
 bitsandbytes==0.49.2
-deepspeed==0.18.1
-flashinfer-cubin==0.6.6
-flashinfer-jit-cache==0.6.6
-flashinfer-python==0.6.6
-vllm-omni==0.18.0
-vllm==0.18.0
+deepspeed==0.19.1
+flashinfer-cubin==0.6.11.post2
+flashinfer-jit-cache==0.6.11.post2
+flashinfer-python==0.6.11.post2
+vllm @ https://github.com/vllm-project/vllm/releases/download/v0.22.0/vllm-0.22.0+cu129-cp38-abi3-manylinux_2_28_x86_64.whl ; platform_machine == 'x86_64' and sys_platform == 'linux'
+vllm-omni==0.22.0
+vllm==0.22.0 ; platform_machine != 'x86_64' or sys_platform != 'linux'
@@ -15,9 +15,9 @@ colorlog==6.9.0
 crawl4ai==0.8.6
 datasets==4.3.0
 ddgs==9.10.0
-diffusers==0.36.0
+diffusers==0.38.0
 docker==7.1.0
-fastembed==0.7.3
+fastembed==0.8.0
 google-genai==1.47.0
 grpcio==1.76.0
 httpx==0.28.1
@@ -38,9 +38,9 @@ opentelemetry-instrumentation==0.59b0
 opentelemetry-sdk==1.38.0
 opentelemetry-semantic-conventions==0.59b0
 pandas==2.3.3
-peft==0.17.1
+peft==0.19.1
 pexpect==4.9.0
-pillow==11.3.0
+pillow==12.2.0
 prompt-toolkit==3.0.52
 protobuf==5.29.6
 psycopg[binary]==3.2.12
@@ -52,8 +52,8 @@ sqlalchemy==2.0.44
 sqlmodel==0.0.27
 tiktoken==0.12.0
 toml==0.10.2
-torch==2.10.0
-transformers==4.57.6
+torch==2.11.0
+transformers==5.8.1
 trl==0.23.0
 websocket-client==1.9.0
 wikipedia-api==0.8.1
@@ -1,11 +1,18 @@
 """Pytest configuration for tests directory."""
 
 import logging
+import os
 import sys
 import types
 
 import pytest
 
+# Run the unit suite CPU-only by default. transformers eagerly initializes the
+# CUDA device when a TrainingArguments-derived config is constructed, so config
+# tests would otherwise crash on a host whose driver can't init the installed
+# torch build. Set CUDA_VISIBLE_DEVICES explicitly to run GPU-marked tests.
+os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
+
 # The ``vastai`` SDK makes a network call at import time.  A minimal stub
 # is inserted before importing any server modules so that tests never
 # trigger real network traffic.