Skip to content

Commit 0d34ca6

Browse files
[feat] upgrade torchrec to 1.0.0 (#19)
1 parent 55d2874 commit 0d34ca6

File tree

14 files changed

+74
-35
lines changed

14 files changed

+74
-35
lines changed

.github/workflows/codestyle_ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
ci-test:
1010
runs-on: tzrec-codestyle-runner
1111
container:
12-
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.5
12+
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.6
1313
steps:
1414
- name: FetchCommit ${{ github.event.pull_request.head.sha }}
1515
uses: actions/checkout@v2

.github/workflows/pytyping_ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
ci-test:
1010
runs-on: tzrec-codestyle-runner
1111
container:
12-
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.5
12+
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.6
1313
steps:
1414
- name: FetchCommit ${{ github.event.pull_request.head.sha }}
1515
uses: actions/checkout@v2

.github/workflows/unittest_ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
ci-test:
1010
runs-on: tzrec-runner
1111
container:
12-
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.5
12+
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.6
1313
options: --gpus all --ipc host
1414
steps:
1515
- name: FetchCommit ${{ github.event.pull_request.head.sha }}

.pre-commit-config.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ repos:
66
files: \.py$
77
args: ["--license-filepath", "data/.license_header.txt", "--use-current-year"]
88
- repo: https://github.com/astral-sh/ruff-pre-commit
9-
rev: v0.4.10
9+
rev: v0.7.1
1010
hooks:
1111
- id: ruff
1212
args: [ --fix ]
1313
- id: ruff-format
1414
- repo: https://github.com/pre-commit/pre-commit-hooks
15-
rev: v4.6.0
15+
rev: v5.0.0
1616
hooks:
1717
- id: trailing-whitespace
1818
- id: check-yaml
@@ -29,7 +29,7 @@ repos:
2929
- id: codespell
3030
args: ["--skip", "*.json"]
3131
- repo: https://github.com/executablebooks/mdformat
32-
rev: 0.7.17
32+
rev: 0.7.18
3333
hooks:
3434
- id: mdformat
3535
additional_dependencies:

docker/Dockerfile

+30-8
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,44 @@
1-
FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel
1+
FROM ubuntu:22.04
22

33
RUN sed -i "s@http://archive.ubuntu.com@http://mirrors.aliyun.com@g" /etc/apt/sources.list && \
44
sed -i "s@http://security.ubuntu.com@http://mirrors.aliyun.com@g" /etc/apt/sources.list && \
55
sed -i "s@http://ports.ubuntu.com@http://mirrors.aliyun.com@g" /etc/apt/sources.list && \
66
apt-get update && \
77
apt-get upgrade -y && \
8-
apt-get install -y git vim watchman wget
8+
apt-get install -y --no-install-recommends \
9+
build-essential ca-certificates \
10+
ccache cmake gcc git vim watchman wget curl && \
11+
rm -rf /var/lib/apt/lists/*
912

1013
RUN wget https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/libidn11_1.33-2.2ubuntu2_amd64.deb && \
11-
apt-get install ./libidn11_1.33-2.2ubuntu2_amd64.deb
14+
apt-get install ./libidn11_1.33-2.2ubuntu2_amd64.deb && rm libidn11_1.33-2.2ubuntu2_amd64.deb
1215

1316
ADD pip.conf /root/.config/pip/pip.conf
17+
RUN curl -fsSL -v -o ~/miniconda.sh -O "https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/Miniforge3-Linux-x86_64.sh" && \
18+
chmod +x ~/miniconda.sh && \
19+
bash ~/miniconda.sh -b -p /opt/conda && \
20+
rm ~/miniconda.sh && \
21+
/opt/conda/bin/conda update -y -n base -c defaults conda && \
22+
/opt/conda/bin/conda install -y python=3.11 && \
23+
/opt/conda/bin/conda clean -ya
24+
ENV PATH /opt/conda/bin:$PATH
1425

15-
RUN pip install fbgemm-gpu==0.8.0 --index-url https://download.pytorch.org/whl/cu121 && \
16-
pip install torchmetrics==1.0.3 && \
17-
pip install torchrec==0.8.0 --index-url https://download.pytorch.org/whl/cu121 && \
18-
pip install torch_tensorrt==2.4.0
26+
ARG DEVICE
27+
RUN case ${DEVICE} in \
28+
"cu121") pip install torch==2.5.0 fbgemm-gpu==1.0.0 --index-url https://download.pytorch.org/whl/cu121 && \
29+
pip install torchmetrics==1.0.3 torch_tensorrt==2.5.0 && \
30+
pip install torchrec==1.0.0 --index-url https://download.pytorch.org/whl/cu121 ;; \
31+
* ) pip install torch==2.5.0 fbgemm-gpu==1.0.0 --index-url https://download.pytorch.org/whl/cpu && \
32+
pip install torchmetrics==1.0.3 && \
33+
pip install torchrec==1.0.0 --index-url https://download.pytorch.org/whl/cpu ;; \
34+
esac && \
35+
/opt/conda/bin/conda clean -ya
36+
37+
ENV NVIDIA_VISIBLE_DEVICES all
38+
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
39+
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
40+
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
1941

2042
ADD requirements.txt /root/requirements.txt
2143
ADD requirements /root/requirements
22-
RUN cd /root && pip install -r requirements.txt
44+
RUN cd /root && pip install -r requirements.txt && rm requirements.txt

docs/source/quick_start/local_tutorial.md

+12-3
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ pip index versions tzrec -f http://tzrec.oss-cn-beijing.aliyuncs.com/release/nig
1515
```bash
1616
conda create -n tzrec python=3.11
1717
conda activate tzrec
18-
pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu121
19-
pip install fbgemm-gpu==0.8.0 --index-url https://download.pytorch.org/whl/cu121
18+
pip install torch==2.5.0 --index-url https://download.pytorch.org/whl/cu121
19+
pip install fbgemm-gpu==1.0.0 --index-url https://download.pytorch.org/whl/cu121
2020
pip install torchmetrics==1.0.3
21-
pip install torchrec==0.8.0 --index-url https://download.pytorch.org/whl/cu121
21+
pip install torchrec==1.0.0 --index-url https://download.pytorch.org/whl/cu121
2222
pip install tzrec==${TZREC_NIGHTLY_VERSION} -f http://tzrec.oss-cn-beijing.aliyuncs.com/release/nightly/repo.html --trusted-host tzrec.oss-cn-beijing.aliyuncs.com
2323
```
2424

@@ -30,6 +30,15 @@ docker exec -it <CONTAINER_ID> bash
3030
pip install tzrec==${TZREC_NIGHTLY_VERSION} -f http://tzrec.oss-cn-beijing.aliyuncs.com/release/nightly/repo.html --trusted-host tzrec.oss-cn-beijing.aliyuncs.com
3131
```
3232

33+
注:
34+
35+
```
36+
GPU版本(CUDA 12.1) 镜像地址:
37+
mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:${TZREC_DOCKER_VERSION}-cu121
38+
CPU版本 镜像地址:
39+
mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:${TZREC_DOCKER_VERSION}-cpu
40+
```
41+
3342
## 前置准备
3443

3544
### 数据

requirements/runtime.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ alibabacloud_credentials
22
anytree
33
common_io @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/common_io-0.4.1%2Btunnel-py2.py3-none-any.whl
44
faiss-cpu
5-
fbgemm-gpu==0.8.0
5+
fbgemm-gpu==1.0.0
66
graphlearn @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/graphlearn-1.3.0-cp311-cp311-linux_x86_64.whl ; python_version=="3.11"
77
graphlearn @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/graphlearn-1.3.0-cp310-cp310-linux_x86_64.whl ; python_version=="3.10"
88
grpcio-tools<1.63.0
@@ -11,6 +11,6 @@ pyfg @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/pyfg-0.3.2-cp311-cp
1111
pyfg @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/pyfg-0.3.2-cp310-cp310-linux_x86_64.whl ; python_version=="3.10"
1212
scikit-learn
1313
tensorboard
14-
torch==2.4.0
14+
torch==2.5.0
1515
torchmetrics==1.0.3
16-
torchrec==0.8.0
16+
torchrec==1.0.0

scripts/build_docker.sh

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
#!/usr/bin/env bash
22

33
REGISTRY=mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec
4-
DOCKER_TAG=0.5
4+
DOCKER_TAG=0.6
55

66
cp requirements.txt docker/
77
rm -rf docker/requirements
88
cp -r requirements/ docker/requirements
99
cd docker
1010

11-
docker build -t ${REGISTRY}/tzrec-devel:latest .
12-
docker images -q ${REGISTRY}/tzrec-devel:latest | xargs -I {} docker tag {} ${REGISTRY}/tzrec-devel:${DOCKER_TAG}
13-
docker push ${REGISTRY}/tzrec-devel:latest
11+
for DEVICE in cu121 cpu
12+
do
13+
docker build --network host -t ${REGISTRY}/tzrec-devel:${DOCKER_TAG}-${DEVICE} --build-arg DEVICE=${DEVICE} .
14+
docker push ${REGISTRY}/tzrec-devel:${DOCKER_TAG}-${DEVICE}
15+
done
16+
17+
docker images -q ${REGISTRY}/tzrec-devel:${DOCKER_TAG}-cu121 | xargs -I {} docker tag {} ${REGISTRY}/tzrec-devel:${DOCKER_TAG}
18+
docker images -q ${REGISTRY}/tzrec-devel:${DOCKER_TAG}-cu121 | xargs -I {} docker tag {} ${REGISTRY}/tzrec-devel:latest
1419
docker push ${REGISTRY}/tzrec-devel:${DOCKER_TAG}
20+
docker push ${REGISTRY}/tzrec-devel:latest

scripts/ci_test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
pip install -r requirements.txt
44
bash scripts/gen_proto.sh
55

6-
MKL_THREADING_LAYER=GNU PYTHONPATH=. python tzrec/tests/run.py
6+
MKL_THREADING_LAYER=GNU TORCH_DEVICE_BACKEND_AUTOLOAD=0 PYTHONPATH=. python tzrec/tests/run.py

tzrec/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
import os as _os
1313

14+
import torch as _torch # NOQA
15+
1416
if "OMP_NUM_THREADS" not in _os.environ:
1517
_os.environ["OMP_NUM_THREADS"] = "1"
1618

tzrec/datasets/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def to(self, device: torch.device, non_blocking: bool = False) -> "Batch":
133133
batch_size=self.batch_size,
134134
)
135135

136-
def record_stream(self, stream: torch.cuda.streams.Stream) -> None:
136+
def record_stream(self, stream: torch.Stream) -> None:
137137
"""Record which streams have used the tensor."""
138138
for v in self.dense_features.values():
139139
v.record_stream(stream)

tzrec/tests/utils.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ def build_mock_input_with_fg(
582582
inputs = defaultdict(dict)
583583
single_id_fields = {user_id, item_id}
584584
for feature in features:
585-
if type(feature) == IdFeature:
585+
if type(feature) is IdFeature:
586586
is_multi = (
587587
random.random() < 0.5 and feature.inputs[0] not in single_id_fields
588588
)
@@ -594,22 +594,22 @@ def build_mock_input_with_fg(
594594
vocab_list=feature.config.vocab_list,
595595
multival_sep=chr(29),
596596
)
597-
elif type(feature) == RawFeature:
597+
elif type(feature) is RawFeature:
598598
side, name = feature.side_inputs[0]
599599
inputs[side][name] = RawMockInput(
600600
name,
601601
value_dim=feature.config.value_dim,
602602
multival_sep=chr(29),
603603
)
604-
elif type(feature) == ComboFeature:
604+
elif type(feature) is ComboFeature:
605605
for side, input_name in feature.side_inputs:
606606
if input_name in inputs[side]:
607607
continue
608608
is_multi = random.random() < 0.5 and input_name not in single_id_fields
609609
inputs[side][input_name] = IdMockInput(
610610
input_name, is_multi=is_multi, multival_sep=chr(29)
611611
)
612-
elif type(feature) == LookupFeature:
612+
elif type(feature) is LookupFeature:
613613
for i, (side, input_name) in enumerate(feature.side_inputs):
614614
if input_name in inputs[side]:
615615
continue
@@ -627,7 +627,7 @@ def build_mock_input_with_fg(
627627
inputs[side][input_name] = IdMockInput(
628628
input_name, is_multi=is_multi, multival_sep=chr(29)
629629
)
630-
elif type(feature) == MatchFeature:
630+
elif type(feature) is MatchFeature:
631631
for i, (side, input_name) in enumerate(feature.side_inputs):
632632
if input_name in inputs[side]:
633633
continue
@@ -637,14 +637,14 @@ def build_mock_input_with_fg(
637637
inputs[side][input_name] = IdMockInput(
638638
input_name, multival_sep=chr(29)
639639
)
640-
elif type(feature) == ExprFeature:
640+
elif type(feature) is ExprFeature:
641641
for side, input_name in feature.side_inputs:
642642
if input_name in inputs[side]:
643643
continue
644644
inputs[side][input_name] = RawMockInput(
645645
input_name, multival_sep=chr(29)
646646
)
647-
elif type(feature) == TokenizeFeature:
647+
elif type(feature) is TokenizeFeature:
648648
side, name = feature.side_inputs[0]
649649
inputs[side][name] = IdMockInput(
650650
name,

tzrec/utils/config_util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def edit_config(pipeline_config: Message, edit_config_json: Dict[str, Any]) -> M
127127

128128
# pyre-ignore [2, 3]
129129
def _type_convert(proto, val, parent=None):
130-
if type(val) != type(proto):
130+
if type(val) is not type(proto):
131131
try:
132132
if isinstance(proto, bool):
133133
assert val in ["True", "true", "False", "false"]

tzrec/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@
99
# See the License for the specific language governing permissions and
1010
# limitations under the License.
1111

12-
__version__ = "0.5.8"
12+
__version__ = "0.6.0"

0 commit comments

Comments
 (0)