|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -euo pipefail |
| 4 | + |
| 5 | +readonly pytorch_repo=https://github.com/pytorch/pytorch.git |
| 6 | +readonly pytorch_branch=main |
| 7 | + |
| 8 | +# Ensure the script is being executed in the root cccl directory: |
| 9 | +cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/../.."; |
| 10 | +readonly cccl_repo="${PWD}" |
| 11 | + |
| 12 | +log_vars() { |
| 13 | + for var in "$@"; do |
| 14 | + echo "${var}=${!var}" |
| 15 | + done |
| 16 | +} |
| 17 | + |
| 18 | +# Define CCCL_TAG to override the default CCCL SHA. Otherwise the current HEAD of the local checkout is used. |
| 19 | +echo "CCCL_TAG (override): ${CCCL_TAG-}"; |
| 20 | +if test -n "${CCCL_TAG-}"; then |
| 21 | + # If CCCL_TAG is defined, fetch it to the local checkout |
| 22 | + git -C "${cccl_repo}" fetch origin "${CCCL_TAG}"; |
| 23 | + cccl_sha="$(git -C "${cccl_repo}" rev-parse FETCH_HEAD)"; |
| 24 | +else |
| 25 | + cccl_sha="$(git -C "${cccl_repo}" rev-parse HEAD)"; |
| 26 | +fi |
| 27 | + |
| 28 | +readonly workdir="${cccl_repo}/build/${CCCL_BUILD_INFIX:-}/pytorch" |
| 29 | + |
| 30 | +log_vars \ |
| 31 | + pytorch_repo pytorch_branch \ |
| 32 | + cccl_repo cccl_sha \ |
| 33 | + workdir |
| 34 | + |
| 35 | +mkdir -p "${workdir}" |
| 36 | +cd "${workdir}" |
| 37 | +echo "Working in ${workdir}" |
| 38 | + |
| 39 | +echo "::group::Cloning CCCL..." |
| 40 | +rm -rf cccl |
| 41 | +git clone "${cccl_repo}" |
| 42 | +git -C cccl checkout "${cccl_sha}" |
| 43 | +echo "CCCL HEAD:" |
| 44 | +git -C cccl log -1 --format=short |
| 45 | +echo "::endgroup::" |
| 46 | + |
| 47 | +# Setup a CUDA environment with the requested CCCL. |
| 48 | +# Use a local directory to avoid modifying the actual CUDA install: |
| 49 | +echo "::group::Setting up clone of CUDA environment with custom CCCL..." |
| 50 | +( |
| 51 | + set -x |
| 52 | + rm -rf ./cuda |
| 53 | + cp -Hr /usr/local/cuda ./cuda |
| 54 | + rm -rf ./cuda/include/cccl/* |
| 55 | + cccl/ci/install_cccl.sh ./cccl-install > /dev/null |
| 56 | + cp -r ./cccl-install/include/* ./cuda/include/cccl |
| 57 | +) |
| 58 | +export PATH="$PWD/cuda/bin:$PATH" |
| 59 | +export CUDA_HOME="$PWD/cuda" |
| 60 | +export CUDA_PATH="$PWD/cuda" |
| 61 | +which nvcc |
| 62 | +nvcc --version |
| 63 | +echo "::endgroup::" |
| 64 | + |
| 65 | +echo "::group::Cloning PyTorch..." |
| 66 | +rm -rf pytorch |
| 67 | +git clone ${pytorch_repo} -b ${pytorch_branch} --recursive --depth 1 |
| 68 | +echo "PyTorch HEAD:" |
| 69 | +git -C pytorch log -1 --format=short |
| 70 | +echo "::endgroup::" |
| 71 | + |
| 72 | +echo "::group::Installing PyTorch build dependencies..." |
| 73 | +pytorch_root="$PWD/pytorch" |
| 74 | +export PYTHONPATH="${pytorch_root}:${pytorch_root}/tools:${PYTHONPATH:-}" |
| 75 | +pip install -r "${pytorch_root}/requirements-build.txt" |
| 76 | +echo "::endgroup::" |
| 77 | + |
| 78 | +echo "::group::Configuring PyTorch..." |
| 79 | +rm -rf build |
| 80 | +mkdir build |
| 81 | +declare -a cmake_args=( |
| 82 | + "-DUSE_NCCL=OFF" |
| 83 | + # Need to define this explicitly, torch's FindCUDA logic adds ancient arches if left undefined: |
| 84 | + "-DTORCH_CUDA_ARCH_LIST=7.5;8.0;9.0;10.0;12.0" |
| 85 | +) |
| 86 | +cmake -S ./pytorch -B ./build -G Ninja "${cmake_args[@]}" |
| 87 | +echo "::endgroup::" |
| 88 | + |
| 89 | +# Verify that the configured build is using the custom CUDA dir for CTK and nvcc: |
| 90 | +if ! grep -q "CUDA_TOOLKIT_ROOT_DIR:PATH=$PWD/cuda" ./build/CMakeCache.txt; then |
| 91 | + echo "Error: CUDA_TOOLKIT_ROOT_DIR does not point to the custom CUDA"; |
| 92 | + exit 1; |
| 93 | +fi |
| 94 | +if ! grep -q "CUDA_NVCC_EXECUTABLE:FILEPATH=$PWD/cuda/bin/nvcc" ./build/CMakeCache.txt; then |
| 95 | + echo "Error: CUDA_NVCC_EXECUTABLE does not point to the custom CUDA"; |
| 96 | + exit 1; |
| 97 | +fi |
| 98 | + |
| 99 | +# This builds a bunch of unnecessary targets. Leaving here to use as a fallback if the |
| 100 | +# ninja target extraction below starts failing: |
| 101 | +# echo "::group::Building torch_cuda target..." |
| 102 | +# cmake --build ./build/ --target torch_cuda |
| 103 | +# echo "::endgroup::" |
| 104 | + |
| 105 | +# This cuts the number of built targets roughly in half: |
| 106 | +echo "::group::Extracting cuda targets from build.ninja..." |
| 107 | +# Query ninja for all object files built from CUDA source files in ATen/native/cuda/ |
| 108 | +# that are part of the torch_cuda library: |
| 109 | +ninja -C ./build -t query lib/libtorch_cuda.so | |
| 110 | + grep -E "ATen/native/cuda/.*\\.cu\\.o$" | |
| 111 | + sort | uniq | tee build/cuda_targets.txt |
| 112 | +# At the time this script was written, there were 217 cuda targets. |
| 113 | +# Check that there are at least 100 detected targets, otherwise fail. |
| 114 | +num_targets=$(wc -l < build/cuda_targets.txt) |
| 115 | +if test "$num_targets" -lt 100; then |
| 116 | + echo "Error: extracted cuda targets count is less than 100! ($num_targets)"; |
| 117 | + echo "This likely indicates a failure to extract the targets from ninja."; |
| 118 | + exit 1; |
| 119 | +fi |
| 120 | +echo "::endgroup::" |
| 121 | + |
| 122 | +echo "::group::Building pytorch CUDA targets with custom CCCL..." |
| 123 | +ninja -C ./build $(xargs -a build/cuda_targets.txt) |
| 124 | +echo "::endgroup::" |
| 125 | + |
| 126 | +echo "PyTorch CUDA targets built successfully with custom CCCL." |
0 commit comments