diff --git a/templates/config.json b/templates/config.json index 00ffb149..e0068876 100644 --- a/templates/config.json +++ b/templates/config.json @@ -3748,5 +3748,27 @@ "diskSize": 20 }, "tags": ["LLM Inference & Model Serving", "Developer Tools", "AI Agents"] - } + }, +{ + "id": "deepspeed", + "name": "deepspeedai/DeepSpeed", + "description": "CPU-safe DeepSpeed source/runtime verifier with health, demo, and model-list endpoints. It verifies upstream launcher/runtime/inference/config files without importing DeepSpeed, downloading models, running training, or requiring GPU access.", + "repo": "https://github.com/Phala-Network/phala-cloud/tree/main/templates/prebuilt/deepspeed", + "author": "deepspeedai", + "icon": "deepspeed.svg", + "envs": [ + { + "key": "DEEPSPEED_SOURCE_REF", + "required": false, + "description": "DeepSpeed Git tag, branch, or commit used for CPU-safe source verification.", + "default": "v0.19.1" + } + ], + "defaultResource": { + "vCPU": 1, + "memory": 2048, + "diskSize": 10 + }, + "tags": ["LLM Inference & Model Serving", "Developer Tools", "AI Agents"] +} ] diff --git a/templates/icons/deepspeed.svg b/templates/icons/deepspeed.svg new file mode 100644 index 00000000..77a86ace --- /dev/null +++ b/templates/icons/deepspeed.svg @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/templates/prebuilt/deepspeed/README.md b/templates/prebuilt/deepspeed/README.md new file mode 100644 index 00000000..b6ef31cd --- /dev/null +++ b/templates/prebuilt/deepspeed/README.md @@ -0,0 +1,162 @@ +# deepspeedai/DeepSpeed on Phala Cloud + +Deploy a CPU-safe DeepSpeed source/runtime verifier on Phala Cloud. + +## Overview + +[DeepSpeed](https://github.com/deepspeedai/DeepSpeed) is a GPU-oriented distributed training and inference optimization framework from the DeepSpeed team. Full DeepSpeed workloads normally involve PyTorch, accelerator-specific kernels, distributed launchers, CUDA/ROCm or other accelerator backends, model checkpoints, and training or inference jobs that must be sized for the selected hardware. + +This prebuilt template intentionally does not run a full DeepSpeed training or inference server. The default deployment is a minimal HTTP verifier that is safe for a CPU-only Phala Cloud `tdx.small`-style deployment. It downloads selected public DeepSpeed source and documentation files from a pinned upstream Git ref, verifies markers for core concepts, and `py_compile` checks selected Python source files without importing `deepspeed` or `torch`. + +The demo does not download model weights, run distributed training, start inference, require CUDA/GPU access, require provider credentials, require Hugging Face tokens, mount host paths, or use privileged container features. + +## Metadata + +- Template id: `deepspeed` +- Display name: `deepspeedai/DeepSpeed` +- Upstream repository: https://github.com/deepspeedai/DeepSpeed +- Upstream documentation: https://www.deepspeed.ai/ +- Default source ref: `v0.19.1` +- Icon source: upstream DeepSpeed README logo at `docs/assets/images/DeepSpeed_light.svg` +- Upstream author: DeepSpeed Team, via the `deepspeedai/DeepSpeed` GitHub repository +- Phala prebuilt source: https://github.com/Phala-Network/phala-cloud/tree/main/templates/prebuilt/deepspeed + +## What This Template Runs + +The compose file starts one public HTTP service: + +- `app`: A `python:3.12-slim-bookworm` container that runs an inline Python HTTP server on port `8080`. + +On startup, the verifier fetches these upstream files from `deepspeedai/DeepSpeed` at `DEEPSPEED_SOURCE_REF`: + +- `README.md` +- `deepspeed/launcher/runner.py` +- `deepspeed/runtime/engine.py` +- `deepspeed/runtime/config.py` +- `deepspeed/inference/engine.py` +- `docs/_pages/config-json.md` +- `docs/_pages/inference.md` + +The verifier checks for launcher, runtime engine, inference engine, and JSON config documentation markers, then compiles the selected Python source files with `py_compile`. It never imports DeepSpeed at module import time or request time, which avoids Torch/CUDA initialization and extension compilation on small CPU-only CVMs. + +## Deploy + +1. Deploy the `deepspeed` prebuilt template on Phala Cloud. +2. Keep the default CPU-only resource profile for the source verifier. +3. Optionally set `DEEPSPEED_SOURCE_REF` to another public DeepSpeed tag, branch, or commit. +4. Open the generated public endpoint for port `8080`. +5. Visit `https:///healthz`. + +The first startup fetches a small set of public source files from GitHub. No private repositories, model registries, paid provider APIs, GPU devices, host bind mounts, Docker socket access, host networking, external build contexts, `env_file`, or privileged mode are required. + +## Environment Variables + +No credentials are required for the default verifier. + +| Variable | Required | Default | Description | +| --- | --- | --- | --- | +| `DEEPSPEED_SOURCE_REF` | No | `v0.19.1` | Public DeepSpeed Git tag, branch, or commit used for source checks. | + +If you adapt this template to run real DeepSpeed training or inference, add only the variables required by your selected model, dataset, storage backend, or provider. For gated Hugging Face models, use a required secret or environment variable such as `HF_TOKEN`; do not hardcode real tokens in `docker-compose.yml` or this README. + +## Usage Endpoints + +The public endpoint exposes JSON on port `8080`: + +- `GET /healthz`: Readiness and verifier status. It returns HTTP `200` with `"ok": true` once the source check passes, and includes errors if a ref or marker check fails. +- `GET /demo`: Detailed verifier output, including fetched files, SHA-256 hashes, marker checks, compile checks, and flags confirming that no model, GPU, training, or provider credentials are used. +- `GET /v1/models`: OpenAI-compatible model-list shape with an empty `data` array because this template does not run an inference server. +- `GET /`: Same basic payload as `/healthz`. + +Example: + +```bash +curl -fsS https:///healthz +curl -fsS https:///demo +curl -fsS https:///v1/models +``` + +Expected `/demo` fields after the source check completes include: + +```json +{ + "ok": true, + "source_check": { + "cpu_only": true, + "deepspeed_imported": false, + "torch_imported": false, + "cuda_required": false, + "distributed_training_started": false, + "model_downloaded": false, + "provider_credentials_required": false + } +} +``` + +The `/v1/models` response intentionally has an empty `data` list: + +```json +{ + "object": "list", + "data": [], + "demo": { + "message": "No DeepSpeed model server is running in this CPU-safe source verifier." + } +} +``` + +## Verification/Smoke Test + +Run from the parent monorepo worktree: + +```bash +python3 templates/validate.py +git diff --check origin/main...HEAD +docker compose -f templates/prebuilt/deepspeed/docker-compose.yml config >/dev/null +``` + +Optional local runtime check from the parent monorepo worktree: + +```bash +docker compose -f templates/prebuilt/deepspeed/docker-compose.yml up -d +curl -fsS http://localhost:8080/healthz +curl -fsS http://localhost:8080/demo +curl -fsS http://localhost:8080/v1/models +docker compose -f templates/prebuilt/deepspeed/docker-compose.yml down +``` + +A healthy verifier returns `"ok": true` after it downloads the selected files, verifies the expected DeepSpeed markers, and compiles the selected Python files. + +## Resource Notes + +The default resource profile is intentionally conservative for a Phala Cloud `tdx.small`-style CPU deployment: + +- 1 vCPU +- 2 GiB memory +- 10 GiB disk + +The default container downloads only selected source files into `/tmp` and does not create named volumes. A real DeepSpeed deployment can require substantially more CPU, memory, disk, network bandwidth, GPUs or other accelerators, PyTorch/CUDA-compatible images, distributed job coordination, model checkpoints, and dataset or object-storage access. + +## Production Extension Notes + +- Replace the verifier with a purpose-built DeepSpeed training, inference, or launcher command only after choosing the model, dataset, checkpoint format, and hardware target. +- Pin DeepSpeed, PyTorch, CUDA/ROCm, base image, and model versions for reproducibility. +- Review upstream DeepSpeed installation guidance before enabling ops or JIT extension compilation. +- Use Phala Cloud secrets or required environment variables for credentials such as `HF_TOKEN`, object storage keys, or provider API keys. Keep placeholder names in examples and never commit real values. +- Add authentication before exposing real training controls, model inference, logs, or private metadata. +- Keep host bind mounts, Docker socket mounts, host networking, and privileged mode out of production templates unless there is a reviewed operational reason. + +## Security Notes + +- The default HTTP verifier is unauthenticated and returns source metadata only. +- The compose file uses a public image and inline Compose configs only. +- The compose file does not use host bind mounts, `env_file`, real secrets, privileged mode, host networking, external build contexts, Docker socket access, or GPU device requests. +- `/v1/models` is a compatibility stub, not proof that a model server is running. + +## Upstream Attribution + +DeepSpeed is developed by the DeepSpeed Team in the `deepspeedai/DeepSpeed` repository: https://github.com/deepspeedai/DeepSpeed. + +This Phala Cloud prebuilt template preserves upstream attribution in the template metadata and README while routing deployable assets through the Phala prebuilt template path: https://github.com/Phala-Network/phala-cloud/tree/main/templates/prebuilt/deepspeed. + +The icon saved as `deepspeed.svg` is the upstream DeepSpeed README logo from `docs/assets/images/DeepSpeed_light.svg` in the `deepspeedai/DeepSpeed` repository. diff --git a/templates/prebuilt/deepspeed/docker-compose.yml b/templates/prebuilt/deepspeed/docker-compose.yml new file mode 100644 index 00000000..92ec0e05 --- /dev/null +++ b/templates/prebuilt/deepspeed/docker-compose.yml @@ -0,0 +1,300 @@ +services: + app: + image: python:3.12-slim-bookworm + ports: + - "8080:8080" + environment: + DEEPSPEED_SOURCE_REF: ${DEEPSPEED_SOURCE_REF:-v0.19.1} + PYTHONUNBUFFERED: "1" + command: + - python + - /server.py + configs: + - source: server_py + target: /server.py + healthcheck: + test: + - CMD + - python + - -c + - import urllib.request; urllib.request.urlopen("http://127.0.0.1:8080/healthz", timeout=5).read() + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + restart: unless-stopped + +configs: + server_py: + content: | + import hashlib + import json + import os + import platform + import py_compile + import shutil + import sys + import threading + import time + import urllib.parse + import urllib.request + from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + from pathlib import Path + + STARTED_AT = time.time() + SOURCE_REPO = "deepspeedai/DeepSpeed" + UPSTREAM = f"https://github.com/{SOURCE_REPO}" + RAW_BASE = f"https://raw.githubusercontent.com/{SOURCE_REPO}" + DEFAULT_REF = "v0.19.1" + SOURCE_REF = os.environ.get("DEEPSPEED_SOURCE_REF", DEFAULT_REF).strip() or DEFAULT_REF + SOURCE_ROOT = Path("/tmp/deepspeed-source") + MAX_SOURCE_BYTES = 2 * 1024 * 1024 + + SOURCE_FILES = [ + { + "path": "README.md", + "purpose": "Project README and high-level DeepSpeed positioning", + "markers": ["Extreme Speed and Scale", "ZeRO", "DeepSpeed"], + "compile": False, + }, + { + "path": "deepspeed/launcher/runner.py", + "purpose": "Distributed launcher front-end source", + "markers": ["DeepSpeed runner", "--num_gpus", "PDSHRunner"], + "compile": True, + }, + { + "path": "deepspeed/runtime/engine.py", + "purpose": "DeepSpeed runtime engine source", + "markers": ["class DeepSpeedEngine", "def backward", "zero_optimization"], + "compile": True, + }, + { + "path": "deepspeed/runtime/config.py", + "purpose": "DeepSpeed runtime configuration parser source", + "markers": ["class DeepSpeedConfig", "train_batch_size", "gradient_accumulation_steps"], + "compile": True, + }, + { + "path": "deepspeed/inference/engine.py", + "purpose": "DeepSpeed inference engine source", + "markers": ["class InferenceEngine", "replace_with_kernel_inject", "enable_cuda_graph"], + "compile": True, + }, + { + "path": "docs/_pages/config-json.md", + "purpose": "DeepSpeed JSON configuration documentation", + "markers": ["DeepSpeed Configuration JSON", "train_micro_batch_size_per_gpu", "zero_optimization"], + "compile": False, + }, + { + "path": "docs/_pages/inference.md", + "purpose": "DeepSpeed inference documentation", + "markers": ["DeepSpeed-Inference", "model parallelism", "inference-customized kernels"], + "compile": False, + }, + ] + + STATE_LOCK = threading.Lock() + CHECK_STATE = { + "ok": False, + "status": "starting", + "message": "Source verification has not completed yet.", + "ref": SOURCE_REF, + } + + + def source_url(path): + encoded_ref = urllib.parse.quote(SOURCE_REF, safe="") + encoded_path = urllib.parse.quote(path, safe="/") + return f"{RAW_BASE}/{encoded_ref}/{encoded_path}" + + + def fetch_file(spec): + path = spec["path"] + target = SOURCE_ROOT / path + target.parent.mkdir(parents=True, exist_ok=True) + request = urllib.request.Request( + source_url(path), + headers={"User-Agent": "phala-cloud-deepspeed-template/1.0"}, + ) + with urllib.request.urlopen(request, timeout=30) as response: + data = response.read(MAX_SOURCE_BYTES + 1) + if len(data) > MAX_SOURCE_BYTES: + raise RuntimeError(f"{path} exceeds {MAX_SOURCE_BYTES} bytes") + target.write_bytes(data) + text = data.decode("utf-8", errors="replace") + marker_results = {marker: marker in text for marker in spec["markers"]} + missing_markers = [marker for marker, present in marker_results.items() if not present] + return { + "path": path, + "purpose": spec["purpose"], + "url": source_url(path), + "bytes": len(data), + "sha256": hashlib.sha256(data).hexdigest(), + "markers": marker_results, + "missing_markers": missing_markers, + "compile": spec["compile"], + } + + + def verify_source(): + state = { + "ok": False, + "status": "checking", + "source_repo": SOURCE_REPO, + "upstream": UPSTREAM, + "ref": SOURCE_REF, + "files": [], + "compiled": [], + "errors": [], + "cpu_only": True, + "deepspeed_imported": False, + "torch_imported": False, + "cuda_required": False, + "distributed_training_started": False, + "model_downloaded": False, + "provider_credentials_required": False, + } + + try: + shutil.rmtree(SOURCE_ROOT, ignore_errors=True) + SOURCE_ROOT.mkdir(parents=True, exist_ok=True) + + for spec in SOURCE_FILES: + try: + result = fetch_file(spec) + state["files"].append(result) + if result["missing_markers"]: + state["errors"].append({ + "path": spec["path"], + "error": "Expected source markers were not found", + "missing_markers": result["missing_markers"], + }) + if spec["compile"]: + py_compile.compile(str(SOURCE_ROOT / spec["path"]), doraise=True) + state["compiled"].append(spec["path"]) + except Exception as exc: + state["errors"].append({ + "path": spec["path"], + "error": f"{type(exc).__name__}: {exc}", + }) + + state["ok"] = not state["errors"] + state["status"] = "ready" if state["ok"] else "source_check_failed" + state["message"] = ( + "Downloaded selected upstream DeepSpeed source and documentation files, " + "verified launcher/runtime/inference/config markers, and py_compile checked " + "selected Python source files. The demo did not import deepspeed, import torch, " + "download model weights, start distributed training, or request CUDA/GPU access." + ) + except Exception as exc: + state["status"] = "source_check_failed" + state["errors"].append({"error": f"{type(exc).__name__}: {exc}"}) + state["message"] = ( + "The HTTP verifier is running, but upstream source verification failed. " + "Retry later or set DEEPSPEED_SOURCE_REF to a reachable branch, tag, or commit." + ) + + with STATE_LOCK: + global CHECK_STATE + CHECK_STATE = state + + + def current_state(): + with STATE_LOCK: + return json.loads(json.dumps(CHECK_STATE)) + + + def base_payload(): + return { + "service": "deepspeed-source-verifier", + "display_name": "deepspeedai/DeepSpeed", + "upstream": UPSTREAM, + "python": sys.version.split()[0], + "platform": platform.platform(), + "uptime_seconds": round(time.time() - STARTED_AT, 3), + "demo_scope": ( + "CPU-safe source/runtime verifier. It is not a DeepSpeed training, " + "inference, or distributed launcher server." + ), + } + + + class Handler(BaseHTTPRequestHandler): + server_version = "deepspeed-source-verifier/1.0" + + def log_message(self, fmt, *args): + print("%s - %s" % (self.address_string(), fmt % args), flush=True) + + def respond_json(self, status, payload): + body = json.dumps(payload, sort_keys=True).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self): + path = urllib.parse.urlparse(self.path).path.rstrip("/") or "/" + state = current_state() + + if path in ("/", "/healthz"): + payload = base_payload() + payload.update({ + "ok": state.get("ok") is True, + "status": state.get("status", "unknown"), + "source_ref": state.get("ref", SOURCE_REF), + "cpu_only": True, + "cuda_required": False, + "model_downloaded": False, + "distributed_training_started": False, + "provider_credentials_required": False, + "deepspeed_imported": False, + "endpoints": ["/healthz", "/demo", "/v1/models"], + }) + if state.get("errors"): + payload["errors"] = state["errors"] + self.respond_json(200, payload) + return + + if path == "/demo": + payload = base_payload() + payload.update({ + "ok": state.get("ok") is True, + "check": ( + "Fetch a pinned DeepSpeed ref from GitHub, verify source markers " + "for launcher/runtime/inference/config concepts, and compile selected " + "Python source files without importing DeepSpeed." + ), + "source_check": state, + }) + self.respond_json(200, payload) + return + + if path == "/v1/models": + payload = { + "object": "list", + "data": [], + "demo": { + "message": ( + "No DeepSpeed model server is running in this CPU-safe source verifier." + ), + "source_ok": state.get("ok") is True, + "source_ref": state.get("ref", SOURCE_REF), + "model_downloaded": False, + "inference_started": False, + }, + } + if state.get("errors"): + payload["errors"] = state["errors"] + self.respond_json(200, payload) + return + + self.respond_json(404, {"ok": False, "error": "not found"}) + + + threading.Thread(target=verify_source, daemon=True).start() + server = ThreadingHTTPServer(("0.0.0.0", 8080), Handler) + print("deepspeed source verifier listening on 0.0.0.0:8080", flush=True) + server.serve_forever()