Skip to content

Commit 622d512

Browse files
chore: install tf profiler in tf images (#234)
1 parent 21a216d commit 622d512

File tree

4 files changed

+20
-1
lines changed

4 files changed

+20
-1
lines changed

Dockerfile-default-cpu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ RUN if [ "$TORCHVISION_PIP" ]; then pip install $TORCHVISION_PIP; fi
3333
ARG TORCH_TB_PROFILER_PIP
3434
RUN if [ "$TORCH_TB_PROFILER_PIP" ]; then pip install $TORCH_TB_PROFILER_PIP; fi
3535

36+
ARG TF_PROFILER_PIP
37+
RUN if [ "$TF_PROFILER_PIP" ]; then python -m pip install $TF_PROFILER_PIP; fi
38+
3639
ARG HOROVOD_WITH_TENSORFLOW
3740
RUN if [ "$HOROVOD_WITH_TENSORFLOW" ]; then export HOROVOD_WITH_TENSORFLOW=$HOROVOD_WITH_TENSORFLOW; fi
3841

Dockerfile-default-gpu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ RUN if [ "$TF_CUDA_SYM" ]; then ln -s /usr/local/cuda/lib64/libcusolver.so.11 /o
4141
ARG TORCH_TB_PROFILER_PIP
4242
RUN if [ "$TORCH_TB_PROFILER_PIP" ]; then python -m pip install $TORCH_TB_PROFILER_PIP; fi
4343

44+
ARG TF_PROFILER_PIP
45+
RUN if [ "$TF_PROFILER_PIP" ]; then python -m pip install $TF_PROFILER_PIP; fi
46+
4447
ARG TORCH_CUDA_ARCH_LIST
4548
ARG APEX_GIT
4649
RUN /tmp/det_dockerfile_scripts/install_apex.sh

Dockerfile-default-rocm

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ RUN pip install protobuf==3.20.1
5353
ARG TENSORFLOW_PIP
5454
RUN if [ "$TENSORFLOW_PIP" ]; then pip install $TENSORFLOW_PIP; fi
5555

56+
ARG TORCH_TB_PROFILER_PIP
57+
RUN if [ "$TORCH_TB_PROFILER_PIP" ]; then pip install $TORCH_TB_PROFILER_PIP; fi
58+
59+
ARG TF_PROFILER_PIP
60+
RUN if [ "$TF_PROFILER_PIP" ]; then python -m pip install $TF_PROFILER_PIP; fi
61+
5662
# Reset these because we set GPU_OPERATIONS later.
5763
ENV HOROVOD_GPU_BROADCAST=
5864
ENV HOROVOD_GPU_ALLREDUCE=

Makefile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,16 @@ build-gpu-cuda-118-base:
179179
.
180180

181181
export ROCM50_TORCH_TF_ENVIRONMENT_NAME := $(ROCM_50_PREFIX)pytorch-1.10-tf-2.7-rocm
182+
export TF_PROFILER_PIP := tensorboard-plugin-profile
183+
export TORCH_TB_PROFILER_PIP := torch-tb-profiler==0.4.1
182184

183185
.PHONY: build-pytorch10-tf27-rocm50
184186
build-pytorch10-tf27-rocm50:
185187
docker build -f Dockerfile-default-rocm \
186188
--build-arg BASE_IMAGE="amdih/pytorch:rocm5.0_ubuntu18.04_py3.7_pytorch_1.10.0" \
189+
--build-arg TORCH_TB_PROFILER_PIP="$(TORCH_TB_PROFILER_PIP)" \
187190
--build-arg TENSORFLOW_PIP="tensorflow-rocm==2.7.1" \
191+
--build-arg TF_PROFILER_PIP="$(TF_PROFILER_PIP)" \
188192
--build-arg HOROVOD_PIP="horovod==0.25.0" \
189193
-t $(DOCKERHUB_REGISTRY)/$(ROCM50_TORCH_TF_ENVIRONMENT_NAME)-$(SHORT_GIT_HASH) \
190194
-t $(DOCKERHUB_REGISTRY)/$(ROCM50_TORCH_TF_ENVIRONMENT_NAME)-$(VERSION) \
@@ -194,7 +198,6 @@ DEEPSPEED_VERSION := 0.8.3
194198
export GPU_DEEPSPEED_ENVIRONMENT_NAME := $(CUDA_113_PREFIX)pytorch-1.10-deepspeed-$(DEEPSPEED_VERSION)$(GPU_SUFFIX)
195199
export GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME := $(CUDA_113_PREFIX)pytorch-1.10-gpt-neox-deepspeed$(GPU_SUFFIX)
196200
export TORCH_PIP_DEEPSPEED_GPU := torch==1.10.2+cu113 torchvision==0.11.3+cu113 torchaudio==0.10.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
197-
export TORCH_TB_PROFILER_PIP := torch-tb-profiler==0.4.1
198201

199202
# This builds deepspeed environment off of upstream microsoft/DeepSpeed.
200203
.PHONY: build-deepspeed-gpu
@@ -254,6 +257,7 @@ build-tf28-cpu: build-cpu-py-38-base
254257
--platform "$(PLATFORMS)" \
255258
--build-arg BASE_IMAGE="$(DOCKERHUB_REGISTRY)/$(CPU_PY_38_BASE_NAME)-$(SHORT_GIT_HASH)" \
256259
--build-arg TENSORFLOW_PIP="tensorflow-cpu==2.8.4" \
260+
--build-arg TF_PROFILER_PIP="$(TF_PROFILER_PIP)" \
257261
--build-arg HOROVOD_PIP="horovod==0.24.2" \
258262
--build-arg HOROVOD_WITH_PYTORCH=0 \
259263
--build-arg HOROVOD_WITH_MPI="$(HOROVOD_WITH_MPI)" \
@@ -268,6 +272,7 @@ build-tf28-gpu: build-gpu-cuda-112-base
268272
docker build -f Dockerfile-default-gpu \
269273
--build-arg BASE_IMAGE="$(DOCKERHUB_REGISTRY)/$(GPU_CUDA_112_BASE_NAME)-$(SHORT_GIT_HASH)" \
270274
--build-arg TENSORFLOW_PIP="tensorflow==2.8.3" \
275+
--build-arg TF_PROFILER_PIP="$(TF_PROFILER_PIP)" \
271276
--build-arg HOROVOD_PIP="horovod==0.24.2" \
272277
--build-arg HOROVOD_WITH_PYTORCH=0 \
273278
-t $(DOCKERHUB_REGISTRY)/$(GPU_TF28_ENVIRONMENT_NAME)-$(SHORT_GIT_HASH) \
@@ -320,6 +325,7 @@ build-tf2-cpu: build-cpu-py-39-base
320325
--platform "$(PLATFORMS)" \
321326
--build-arg BASE_IMAGE="$(DOCKERHUB_REGISTRY)/$(CPU_PY_39_BASE_NAME)-$(SHORT_GIT_HASH)" \
322327
--build-arg TENSORFLOW_PIP="$(TF2_PIP_CPU)" \
328+
--build-arg TF_PROFILER_PIP="$(TF_PROFILER_PIP)" \
323329
--build-arg TORCH_PIP="$(TORCH_PIP_CPU)" \
324330
--build-arg TORCH_TB_PROFILER_PIP="$(TORCH_TB_PROFILER_PIP)" \
325331
--build-arg HOROVOD_PIP="$(HOROVOD_PIP_COMMAND)" \
@@ -351,6 +357,7 @@ build-tf2-gpu: build-gpu-cuda-113-base
351357
--build-arg BASE_IMAGE="$(DOCKERHUB_REGISTRY)/$(GPU_CUDA_113_BASE_NAME)-$(SHORT_GIT_HASH)" \
352358
--build-arg TENSORFLOW_PIP="$(TF2_PIP_GPU)" \
353359
--build-arg TORCH_PIP="$(TORCH_PIP_GPU)" \
360+
--build-arg TF_PROFILER_PIP="$(TF_PROFILER_PIP)" \
354361
--build-arg TORCH_TB_PROFILER_PIP="$(TORCH_TB_PROFILER_PIP)" \
355362
--build-arg TORCH_CUDA_ARCH_LIST="3.7;6.0;6.1;6.2;7.0;7.5;8.0" \
356363
--build-arg APEX_GIT="https://github.com/determined-ai/apex.git@3caf0f40c92e92b40051d3afff8568a24b8be28d" \

0 commit comments

Comments
 (0)