diff --git a/.circleci/config.yml b/.circleci/config.yml index 877ed569f..247f9e8ad 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,9 +6,9 @@ version: 2.1 # ------------------------------------------------------------------------------------- gpu: &gpu environment: - CUDA_VERSION: "11.1" + CUDA_VERSION: "11.8" machine: - image: ubuntu-1604-cuda-11.1:202012-01 + image: linux-cuda-11:2023.02.1 resource_class: gpu.nvidia.medium.multi @@ -21,12 +21,15 @@ install_dep_common: &install_dep_common - run: name: Install Common Dependencies command: | + source $BASH_ENV source activate metaseq - # Fixed version to work around https://github.com/pytorch/pytorch/pull/69904 - pip install setuptools==59.5.0 - pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U python -c 'import torch; print("Torch version:", torch.__version__)' python -m torch.utils.collect_env + + pip install --upgrade pip + pip install --upgrade setuptools + pip install packaging + # Need to install ninja build system sudo apt-get update sudo apt-get install ninja-build @@ -37,18 +40,17 @@ install_dep_fused_ops: &install_dep_fused_ops working_directory: ~/ # because of https://github.com/NVIDIA/apex/issues/1252 we need to pin to a specific apex commit command: | + source $BASH_ENV source activate metaseq if ! python -c 'import apex'; then - git clone https://github.com/NVIDIA/apex + git clone --recursive https://github.com/NVIDIA/apex.git cd apex - git checkout e2083df5eb96643c61613b9df48dd4eea6b07690 pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./ cd ~/ fi if ! python -c 'import megatron_lm'; then git clone --depth=1 --branch fairseq_v3 https://github.com/ngoyal2707/Megatron-LM.git cd Megatron-LM - pip install -r requirements.txt pip install -e . cd ~/ fi @@ -59,6 +61,7 @@ install_fairscale: &install_fairscale name: Install Fairscale from Source working_directory: ~/ command: | + source $BASH_ENV source activate metaseq if ! python -c 'import fairscale'; then git clone https://github.com/facebookresearch/fairscale.git @@ -72,9 +75,10 @@ install_dep_pt19: &install_dep_pt19 - run: name: Install Pytorch Dependencies command: | + source $BASH_ENV source activate metaseq pip install --upgrade setuptools - pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html + pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 python -c 'import torch; print("Torch version:", torch.__version__)' @@ -86,6 +90,7 @@ install_pytorch_dep: &install_pytorch_dep - run: name: Install Pytorch Dependencies command: | + source $BASH_ENV source activate metaseq pip install --upgrade setuptools echo "<>" @@ -96,6 +101,7 @@ install_repo: &install_repo - run: name: Install Repository command: | + source $BASH_ENV source activate metaseq pip install -e .[dev,few_shot,gpu] python setup.py build_ext --inplace @@ -121,7 +127,7 @@ create_conda_env: &create_conda_env source $BASH_ENV if [ ! -d ~/miniconda/envs/metaseq ] then - conda create -y -n metaseq python=3.8 + conda create -y -n metaseq python=3.9 fi source activate metaseq python --version @@ -132,11 +138,12 @@ download_and_configure_125m_with_hf_dependencies: &download_and_configure_125m_w name: Download and configure a 125m checkpoint with HF dependencies working_directory: ~/metaseq/gpu_tests command: | + source $BASH_ENV source activate metaseq wget https://dl.fbaipublicfiles.com/opt/test_artifacts/125m_with_hf_dependencies.tar.gz tar -xvzf ./125m_with_hf_dependencies.tar.gz -C . python -m metaseq.scripts.convert_to_singleton ./125m - python -m transformers.models.opt.convert_opt_original_pytorch_checkpoint_to_pytorch --pytorch_dump_folder_path ./125m/ --hf_config ./125m/config.json --fairseq_path ./125m/restored.pt + python -m transformers.models.opt.convert_opt_original_pytorch_checkpoint_to_pytorch --pytorch_dump_folder_path ./125m/ --hf_config ./125m/config.json --fairseq_path ./125m/restored.pt commands: @@ -160,7 +167,7 @@ commands: - <<: *download_and_configure_125m_with_hf_dependencies - save_cache: paths: - - ~/miniconda/envs/metaseq/lib/python3.8/site-packages + - ~/miniconda/envs/metaseq/lib/python3.9/site-packages key: *cache_key - run: name: Run Unit Tests