From 29f045d596893419e2e2db408e04d4c19cceaaa2 Mon Sep 17 00:00:00 2001 From: tsai Date: Thu, 22 Dec 2022 13:45:51 +0800 Subject: [PATCH 1/5] apply --- .github/workflows/build-oneflow.yml | 135 ++++++++++++++++++ .gitignore | 1 + build.sh | 34 ++--- docker/Dockerfile | 9 ++ .../build_scripts/install-runtime-packages.sh | 8 ++ 5 files changed, 164 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/build-oneflow.yml diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml new file mode 100644 index 000000000..6f9598de1 --- /dev/null +++ b/.github/workflows/build-oneflow.yml @@ -0,0 +1,135 @@ +name: Build (OneFlow) + +on: + workflow_dispatch: + inputs: + useCache: + description: Use GHA cache + type: boolean + required: false + default: true + push: + branches-ignore: + - "update-dependencies-pr" + paths: + - ".github/workflows/build.yml" + - "docker/**" + - "*.sh" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +env: + REGION_ID: cn-beijing + ACR_REGISTRY: registry.cn-beijing.aliyuncs.com + ACR_NAMESPACE: oneflow + DOCKER_HUB_NAMESPACE: oneflowinc + +jobs: + build_manylinux: + name: ${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }} + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + tag-suffix: ["cuda11.2", "cuda10.2"] + include: + - tag-suffix: "cuda11.8" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.8.0-cudnn8-devel-centos7" + - tag-suffix: "cuda11.6" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.6.2-cudnn8-devel-centos7" + - tag-suffix: "cuda11.5" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.5.1-cudnn8-devel-centos7" + - tag-suffix: "cuda11.4" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.4.3-cudnn8-devel-centos7" + - tag-suffix: "cuda11.3" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.3.1-cudnn8-devel-centos7" + - tag-suffix: "cuda11.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.2.2-cudnn8-devel-centos7" + - tag-suffix: "cuda11.0" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.0.3-cudnn8-devel-centos7" + target: "manylinux" + - tag-suffix: "cuda10.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:10.2-cudnn7-devel-centos7" + - tag-suffix: "cpu" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "" + + env: + POLICY: ${{ matrix.policy }} + PLATFORM: ${{ matrix.platform }} + COMMIT_SHA: ${{ github.sha }} + DOCKER_REPO: "${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }}" + TEST_TAG: ${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }}:${{ github.sha }} + CUDA_BASE_IMAGE: ${{ matrix.CUDA_BASE_IMAGE }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + # 1.1 Login to ACR + - name: Login to ACR with the AccessKey pair + uses: aliyun/acr-login@v1 + with: + login-server: https://registry.${{env.REGION_ID}}.aliyuncs.com + username: "${{ secrets.ACR_USERNAME }}" + password: "${{ secrets.ACR_PASSWORD }}" + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up emulation + if: matrix.platform != 'i686' && matrix.platform != 'x86_64' + uses: docker/setup-qemu-action@v2 + with: + platforms: ${{ matrix.platform }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build + run: ./build.sh + - name: Build and push + uses: docker/build-push-action@v2 + with: + push: true + tags: | + ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:${{ env.COMMIT_SHA }} + ${{ env.ACR_REGISTRY }}/${{ env.ACR_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + ${{ env.ACR_REGISTRY }}/${{ env.ACR_NAMESPACE }}/${{ env.DOCKER_REPO }}:${{ env.COMMIT_SHA }} + cache-from: type=registry,ref=${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + cache-to: type=inline + context: ./docker/ + build-args: | + POLICY + PLATFORM + BASEIMAGE + DEVTOOLSET_ROOTPATH + PREPEND_PATH + LD_LIBRARY_PATH_ARG + + all_passed: + needs: [build_manylinux] + runs-on: ubuntu-latest + steps: + - run: echo "All jobs passed" diff --git a/.gitignore b/.gitignore index a6a86cf99..dc2ec39a5 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,4 @@ docker/sources # buildx cache .buildx-cache-*/ +.vscode/ diff --git a/build.sh b/build.sh index 9234d4582..029c756a8 100755 --- a/build.sh +++ b/build.sh @@ -32,7 +32,8 @@ if [ "${POLICY}" == "manylinux2014" ]; then if [ "${PLATFORM}" == "s390x" ]; then BASEIMAGE="s390x/clefos:7" else - BASEIMAGE="${MULTIARCH_PREFIX}centos:7" + DEFAULT_BASEIMAGE="${MULTIARCH_PREFIX}centos:7" + BASEIMAGE="${CUDA_BASE_IMAGE:-${DEFAULT_BASEIMAGE}}" fi DEVTOOLSET_ROOTPATH="/opt/rh/devtoolset-10/root" PREPEND_PATH="${DEVTOOLSET_ROOTPATH}/usr/bin:" @@ -84,31 +85,18 @@ fi if [ "${MANYLINUX_BUILD_FRONTEND}" == "docker" ]; then docker build ${BUILD_ARGS_COMMON} elif [ "${MANYLINUX_BUILD_FRONTEND}" == "docker-buildx" ]; then - docker buildx build \ - --load \ - --cache-from=type=local,src=$(pwd)/.buildx-cache-${POLICY}_${PLATFORM} \ - --cache-to=type=local,dest=$(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} \ - ${BUILD_ARGS_COMMON} + env elif [ "${MANYLINUX_BUILD_FRONTEND}" == "buildkit" ]; then - buildctl build \ - --frontend=dockerfile.v0 \ - --local context=./docker/ \ - --local dockerfile=./docker/ \ - --import-cache type=local,src=$(pwd)/.buildx-cache-${POLICY}_${PLATFORM} \ - --export-cache type=local,dest=$(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} \ - --opt build-arg:POLICY=${POLICY} --opt build-arg:PLATFORM=${PLATFORM} --opt build-arg:BASEIMAGE=${BASEIMAGE} \ - --opt "build-arg:DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}" --opt "build-arg:PREPEND_PATH=${PREPEND_PATH}" --opt "build-arg:LD_LIBRARY_PATH_ARG=${LD_LIBRARY_PATH_ARG}" \ - --output type=docker,name=quay.io/pypa/${POLICY}_${PLATFORM}:${COMMIT_SHA} | docker load + echo "Unsupported build frontend: buildkit" + exit 1 else echo "Unsupported build frontend: '${MANYLINUX_BUILD_FRONTEND}'" exit 1 fi -docker run --rm -v $(pwd)/tests:/tests:ro quay.io/pypa/${POLICY}_${PLATFORM}:${COMMIT_SHA} /tests/run_tests.sh - -if [ "${MANYLINUX_BUILD_FRONTEND}" != "docker" ]; then - if [ -d $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} ]; then - rm -rf $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} - fi - mv $(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} -fi +echo "POLICY=${POLICY}" >> $GITHUB_ENV +echo "PLATFORM=${PLATFORM}" >> $GITHUB_ENV +echo "BASEIMAGE=${BASEIMAGE}" >> $GITHUB_ENV +echo "DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}" >> $GITHUB_ENV +echo "PREPEND_PATH=${PREPEND_PATH}" >> $GITHUB_ENV +echo "LD_LIBRARY_PATH_ARG=${LD_LIBRARY_PATH_ARG}" >> $GITHUB_ENV diff --git a/docker/Dockerfile b/docker/Dockerfile index 693ed64ea..2e7678a3c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -171,4 +171,13 @@ RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts ENV SSL_CERT_FILE=/opt/_internal/certs.pem +ARG BAZEL_URL="https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64" +RUN curl -L $BAZEL_URL -o /usr/local/bin/bazel \ + && chmod +x /usr/local/bin/bazel +RUN yum install -y wget nasm rdma-core-devel rsync gdb ninja-build openblas-static devtoolset-7-gcc* vim ccache htop fuse-devel +RUN yum install -y devtoolset-10-libasan-devel devtoolset-10-libubsan-devel devtoolset-10-libtsan-devel + +RUN wget https://github.com/Oneflow-Inc/llvm-project/releases/download/maybe-14.0.4/clang-tidy-14.AppImage -O /usr/local/bin/clangd && \ + chmod +x /usr/local/bin/clangd + CMD ["/bin/bash"] diff --git a/docker/build_scripts/install-runtime-packages.sh b/docker/build_scripts/install-runtime-packages.sh index fac26640f..9985a2699 100755 --- a/docker/build_scripts/install-runtime-packages.sh +++ b/docker/build_scripts/install-runtime-packages.sh @@ -34,6 +34,14 @@ source $MY_DIR/build_utils.sh # MANYLINUX_DEPS: Install development packages (except for libgcc which is provided by gcc install) if [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ] || [ "${AUDITWHEEL_POLICY}" == "manylinux_2_28" ]; then MANYLINUX_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel zlib-devel expat-devel" + yum -y install yum-versionlock + yum versionlock add libcudnn8-devel + yum versionlock add libcudnn8 + yum versionlock add cuda-* + yum versionlock add libnccl + yum versionlock add libnccl-devel + yum versionlock list libcudnn8-devel + yum versionlock list libcudnn8 elif [ "${AUDITWHEEL_POLICY}" == "manylinux_2_24" ]; then MANYLINUX_DEPS="libc6-dev libstdc++-6-dev libglib2.0-dev libx11-dev libxext-dev libxrender-dev libgl1-mesa-dev libice-dev libsm-dev libz-dev libexpat1-dev" elif [ "${AUDITWHEEL_POLICY}" == "musllinux_1_1" ]; then From 0c557758d35121efb5a5c3af5eecfe5de715d40c Mon Sep 17 00:00:00 2001 From: tsai Date: Wed, 25 Oct 2023 15:50:10 +0800 Subject: [PATCH 2/5] add cu12 --- .github/workflows/build-oneflow.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index 6f9598de1..021b1dbcf 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -35,6 +35,18 @@ jobs: matrix: tag-suffix: ["cuda11.2", "cuda10.2"] include: + - tag-suffix: "cuda12.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.2.2-cudnn8-devel-centos7" + - tag-suffix: "cuda12.1" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.1.1-cudnn8-devel-centos7" + - tag-suffix: "cuda12.0" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.0.1-cudnn8-devel-centos7" - tag-suffix: "cuda11.8" policy: "manylinux2014" platform: "x86_64" From 4c8c8545bf549f577316abfe3bdd2459ef788dba Mon Sep 17 00:00:00 2001 From: tsai Date: Wed, 13 Dec 2023 11:03:59 +0800 Subject: [PATCH 3/5] refine --- .github/workflows/build-oneflow.yml | 36 +---------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index 021b1dbcf..ebd8f36da 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -17,7 +17,7 @@ on: - "*.sh" concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-v2 cancel-in-progress: true env: @@ -33,7 +33,6 @@ jobs: strategy: fail-fast: false matrix: - tag-suffix: ["cuda11.2", "cuda10.2"] include: - tag-suffix: "cuda12.2" policy: "manylinux2014" @@ -43,43 +42,10 @@ jobs: policy: "manylinux2014" platform: "x86_64" CUDA_BASE_IMAGE: "nvidia/cuda:12.1.1-cudnn8-devel-centos7" - - tag-suffix: "cuda12.0" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:12.0.1-cudnn8-devel-centos7" - tag-suffix: "cuda11.8" policy: "manylinux2014" platform: "x86_64" CUDA_BASE_IMAGE: "nvidia/cuda:11.8.0-cudnn8-devel-centos7" - - tag-suffix: "cuda11.6" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.6.2-cudnn8-devel-centos7" - - tag-suffix: "cuda11.5" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.5.1-cudnn8-devel-centos7" - - tag-suffix: "cuda11.4" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.4.3-cudnn8-devel-centos7" - - tag-suffix: "cuda11.3" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.3.1-cudnn8-devel-centos7" - - tag-suffix: "cuda11.2" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.2.2-cudnn8-devel-centos7" - - tag-suffix: "cuda11.0" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.0.3-cudnn8-devel-centos7" - target: "manylinux" - - tag-suffix: "cuda10.2" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:10.2-cudnn7-devel-centos7" - tag-suffix: "cpu" policy: "manylinux2014" platform: "x86_64" From 6455f9b8154333333e6285fde3747aaac4a92929 Mon Sep 17 00:00:00 2001 From: tsai Date: Wed, 13 Dec 2023 11:05:24 +0800 Subject: [PATCH 4/5] fix --- .github/workflows/build-oneflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index ebd8f36da..a36b8a163 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -12,7 +12,7 @@ on: branches-ignore: - "update-dependencies-pr" paths: - - ".github/workflows/build.yml" + - ".github/workflows/**" - "docker/**" - "*.sh" From e11e421bc320370b0fd486ff9e90cd819c05fe8b Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 11:54:21 +0800 Subject: [PATCH 5/5] perl-IPC-Cmd --- docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0b6eee303..25bf6f6b1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -173,6 +173,7 @@ RUN curl -L $BAZEL_URL -o /usr/local/bin/bazel \ && chmod +x /usr/local/bin/bazel RUN yum install -y wget nasm rdma-core-devel rsync gdb ninja-build openblas-static devtoolset-7-gcc* vim ccache htop fuse-devel RUN yum install -y devtoolset-10-libasan-devel devtoolset-10-libubsan-devel devtoolset-10-libtsan-devel +RUN yum install -y perl-IPC-Cmd RUN wget https://github.com/Oneflow-Inc/llvm-project/releases/download/maybe-14.0.4/clang-tidy-14.AppImage -O /usr/local/bin/clangd && \ chmod +x /usr/local/bin/clangd