Skip to content
Draft
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
2bb14a0
Backup: WIP CUDA 13 docker build with uv, xformers, and flash-attention
Oct 15, 2025
d64986f
Fix ONNX Runtime build: disable unit tests to avoid C++ ABI compilati…
Oct 15, 2025
ec4028f
Use system Eigen to avoid download failures during ONNX Runtime build
Oct 15, 2025
bd95e07
Add eigen_path parameter for preinstalled eigen
Oct 15, 2025
56b69f7
Upgrade to ONNX Runtime 1.21.0 and remove system Eigen to fix compati…
Oct 15, 2025
ba4ec35
Use ONNX Runtime main branch for latest CUDA 13 and prototype hardwar…
Oct 15, 2025
4877b60
Upgrade to CUDA 13.0.1, PyTorch 2.9.0 with cu130, and fix uv symlink …
Oct 15, 2025
1cdb4fc
Fix CUDA 13 build: add -Wno-psabi, use Ninja generator, proper comput…
Oct 15, 2025
fe86fe7
Add CUDA include path to fix missing cuda/std/utility header
Oct 15, 2025
339c8f1
Fix CUDA fp4 unused parameter warnings by adding -Wno-unused-parameter
Oct 15, 2025
3e2cc5f
Use CPATH env var to ensure CUDA headers are found by all compilers
Oct 15, 2025
d59c27e
Simplify build config: remove CPATH, NHWC ops, use binskim flags like…
Oct 15, 2025
c37b988
Use Release config instead of RelWithDebInfo to avoid NVCC optimizati…
Oct 16, 2025
6962603
Add back enable_cuda_nhwc_ops and NVCC relaxed flags for CUDA compila…
Oct 16, 2025
cc4a37a
Use exact working CUDA 13 build config from GitHub PR, only changing …
Oct 16, 2025
62e6695
Fix CUDA 13 CCCL header paths for CUTLASS compilation
Oct 16, 2025
5416d2f
Install CCCL headers directly from NVIDIA repository
Oct 16, 2025
8df941a
Fix CCCL installation for CUDA 13.0 base image
Oct 16, 2025
d364dbb
Fix CUTLASS cuda/std header dependencies
Oct 16, 2025
ecca41a
Fix PEP 668 externally-managed-environment error
Oct 16, 2025
7f0ca01
Remove invalid --system flag from pip commands
Oct 16, 2025
5f4c976
Add --force-reinstall to pip upgrade command
Oct 16, 2025
e772e3f
Skip pip upgrade to avoid Debian package conflict
Oct 16, 2025
f7717cf
Replace uv pip with standard pip throughout Dockerfile
Oct 16, 2025
b8ae47f
Restore uv in builder stage, keep regular pip in runtime stage
Oct 16, 2025
32b1a25
Remove references to non-existent CLI and SDK wheels
Oct 16, 2025
7fd22d9
Add CLI and SDK wheel building and installation
Oct 16, 2025
f94b5e9
Fix JupyterLab build for DGX Spark (CUDA 13.0.1)
Oct 19, 2025
a1bdd1d
Remove temporary files and build scripts
Oct 19, 2025
2f08f43
Merge branch 'main' into dgx-spark-docker-build
yeldarby Oct 19, 2025
fb415d1
Fix: Restore modal_app.py and revert to original ENTRYPOINT
Oct 19, 2025
e6ba98f
Update docker/dockerfiles/Dockerfile.onnx.gpu
yeldarby Oct 19, 2025
16c5ca7
Update docker/dockerfiles/Dockerfile.onnx.gpu
yeldarby Oct 19, 2025
cf6a32d
Simplify Dockerfile
yeldarby Oct 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 117 additions & 16 deletions docker/dockerfiles/Dockerfile.onnx.gpu
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 as builder
#has python 3.10
FROM nvcr.io/nvidia/cuda:13.0.1-cudnn-devel-ubuntu24.04 as builder

WORKDIR /app

Expand All @@ -14,8 +13,13 @@ RUN rm -rf /var/lib/apt/lists/* && apt-get clean && apt-get update -y && DEBIAN_
wget \
rustc \
cargo \
curl \
&& rm -rf /var/lib/apt/lists/*

# Install uv using standalone installer (installs to /root/.local/bin)
RUN curl -LsSf https://astral.sh/uv/install.sh | env INSTALLER_NO_MODIFY_PATH=1 sh && \
ln -s /root/.local/bin/uv /usr/local/bin/uv

COPY requirements/requirements.sam.txt \
requirements/requirements.clip.txt \
requirements/requirements.http.txt \
Expand All @@ -30,13 +34,12 @@ COPY requirements/requirements.sam.txt \
requirements/requirements.easyocr.txt \
./

RUN python3 -m pip install -U pip
RUN python3 -m pip install \
# Use uv for much faster package installation (WITHOUT onnxruntime-gpu, we'll build from source)
RUN uv pip install --system --break-system-packages \
-r _requirements.txt \
-r requirements.sam.txt \
-r requirements.clip.txt \
-r requirements.http.txt \
-r requirements.gpu.txt \
-r requirements.gaze.txt \
-r requirements.groundingdino.txt \
-r requirements.doctr.txt \
Expand All @@ -45,22 +48,83 @@ RUN python3 -m pip install \
-r requirements.easyocr.txt \
jupyterlab \
"setuptools<=75.5.0" \
--upgrade \
&& rm -rf ~/.cache/pip
packaging \
numpy \
&& rm -rf ~/.cache/uv

# Install build tools for ONNX Runtime
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
cmake \
ninja-build \
&& rm -rf /var/lib/apt/lists/*
# Fix CUDA 13.0 missing CCCL headers - fetch from NVIDIA repository
# Fix CUDA 13.0 missing CCCL headers - include cuda/std headers
RUN git clone --depth=1 --branch v3.0.0 https://github.com/NVIDIA/cccl.git /tmp/cccl && \
mkdir -p /usr/local/cuda-13.0/targets/sbsa-linux/include/cccl && \
mkdir -p /usr/local/cuda-13.0/targets/sbsa-linux/include/cuda && \
cp -r /tmp/cccl/libcudacxx/include/* /usr/local/cuda-13.0/targets/sbsa-linux/include/ && \
cp -r /tmp/cccl/cub/cub /usr/local/cuda-13.0/targets/sbsa-linux/include/cccl/ && \
cp -r /tmp/cccl/thrust/thrust /usr/local/cuda-13.0/targets/sbsa-linux/include/cccl/ && \
ln -sf /usr/local/cuda-13.0/targets/sbsa-linux /usr/local/cuda/targets/sbsa-linux && \
rm -rf /tmp/cccl

# Build ONNX Runtime from source for CUDA 13.0 (using main branch for latest CUDA 13 fixes)
WORKDIR /tmp
RUN git clone --recursive --branch main https://github.com/microsoft/onnxruntime.git /tmp/onnxruntime
WORKDIR /tmp/onnxruntime

# Build ONNX Runtime with CUDA 13 - using exact working config from GitHub PR
RUN ./build.sh \
--config Release \
--build_dir build/cuda13 \
--parallel 16 \
--use_cuda \
--cuda_version 13.0 \
--cuda_home /usr/local/cuda \
--cudnn_home /usr/local/cuda \
--build_wheel \
--build_shared_lib \
--skip_tests \
--cmake_generator Ninja \
--enable_cuda_nhwc_ops \
--use_binskim_compliant_compile_flags \
--allow_running_as_root \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES="120-real;121-real;121-virtual" \
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF

# Install the built ONNX Runtime wheel
RUN uv pip install --system --break-system-packages /tmp/onnxruntime/build/cuda13/Release/dist/onnxruntime_gpu-*.whl

# Install GPU-enabled PyTorch 2.9.0 with CUDA 13.0 support
RUN uv pip uninstall torch torchvision torchaudio || true && \
uv pip install --system --break-system-packages torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130

# Remove any existing install and clone fresh
RUN uv pip uninstall xformers || true && \
rm -rf /tmp/xformers && \
git clone --recursive https://github.com/facebookresearch/xformers.git /tmp/xformers

# Install setup.py requirements for flash_attn
RUN python3 -m pip install packaging==24.1 && rm -rf ~/.cache/pip
WORKDIR /tmp/xformers
RUN MAX_JOBS=8 CMAKE_BUILD_PARALLEL_LEVEL=8 uv pip install --system --break-system-packages . --no-build-isolation -v

# Install flash_attn required for Paligemma and Florence2
RUN python3 -m pip install -r requirements.pali.flash_attn.txt --no-dependencies --no-build-isolation && rm -rf ~/.cache/pip
ENV CMAKE_BUILD_PARALLEL_LEVEL=4
ENV MAX_JOBS=4
ENV SETUPTOOLS_BUILD_PARALLEL=1
# Clone and build FlashAttention from source
WORKDIR /tmp
RUN git clone https://github.com/Dao-AILab/flash-attention.git
WORKDIR /tmp/flash-attention
RUN MAX_JOBS=4 CMAKE_BUILD_PARALLEL_LEVEL=4 uv pip install --system --break-system-packages . --no-build-isolation -v

# Start runtime stage
FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 as runtime
################################################################################
# RUNTIME STAGE
################################################################################
FROM nvcr.io/nvidia/cuda:13.0.1-cudnn-runtime-ubuntu24.04 as runtime

WORKDIR /app

# Copy Python and installed packages from builder
COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10
COPY --from=builder /usr/local/lib/python3.12 /usr/local/lib/python3.12
COPY --from=builder /usr/local/bin /usr/local/bin

# Install runtime dependencies
Expand All @@ -75,15 +139,52 @@ RUN rm -rf /var/lib/apt/lists/* && apt-get clean && apt-get update -y && DEBIAN_
wget \
rustc \
cargo \
curl \
&& rm -rf /var/lib/apt/lists/*

# uv was already copied from builder stage, no need to reinstall

WORKDIR /build
COPY . .
RUN ln -s /usr/bin/python3 /usr/bin/python
RUN /bin/make create_wheels_for_gpu_notebook
RUN pip3 install --no-cache-dir dist/inference_cli*.whl dist/inference_core*.whl dist/inference_gpu*.whl dist/inference_sdk*.whl "setuptools<=75.5.0"

# Build wheels directly without upgrading pip (Debian-installed pip issue)
RUN python -m pip install --break-system-packages wheel twine requests && \
rm -f dist/* && \
python .release/pypi/inference.core.setup.py bdist_wheel && \
python .release/pypi/inference.gpu.setup.py bdist_wheel && \
python .release/pypi/inference.cli.setup.py bdist_wheel && \
python .release/pypi/inference.sdk.setup.py bdist_wheel

# First install the GPU wheel with no dependencies to avoid re-installing onnxruntime-gpu
RUN python -m pip install --break-system-packages --no-deps dist/inference_gpu*.whl

# Then install the rest with dependency resolution enabled
RUN python -m pip install --break-system-packages \
dist/inference_core*.whl \
dist/inference_cli*.whl \
dist/inference_sdk*.whl \
"setuptools<=75.5.0"


WORKDIR /notebooks

# Install Node.js 20.x and build JupyterLab assets in runtime container
RUN apt-get update && \
apt-get install -y ca-certificates curl gnupg && \
mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
apt-get update && \
apt-get install -y nodejs && \
node --version && \
jupyter lab build --minimize=False --dev-build=False && \
jupyter lab clean && \
npm cache clean --force && \
apt-get remove -y nodejs gnupg && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/*

COPY examples/notebooks .

WORKDIR /app/
Expand Down