Skip to content

Commit cbf4f41

Browse files
mc-nvBenjaminBraunDevnv-kmcgill53ziqif-nvyinggeh
authored
TPRD-1200: Update default branch post 25.03 (#8130)
Co-authored-by: BenjaminBraunDev <[email protected]> Co-authored-by: Kyle McGill <[email protected]> Co-authored-by: Ziqi Fan <[email protected]> Co-authored-by: Yingge He <[email protected]> Co-authored-by: Yingge He <[email protected]> Co-authored-by: Kris Hung <[email protected]> Co-authored-by: richardhuo-nv <[email protected]> Co-authored-by: Tanmay Verma <[email protected]> Co-authored-by: Olga Andreeva <[email protected]> Co-authored-by: Indrajit Bhosale <[email protected]>
1 parent 42811e0 commit cbf4f41

File tree

291 files changed

+1526
-9712
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

291 files changed

+1526
-9712
lines changed

Dockerfile.QA

+19-12
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ RUN apt-get update && \
6161
python3-pip \
6262
python3-wheel \
6363
python3-setuptools \
64+
python3-venv \
6465
rapidjson-dev \
6566
software-properties-common && \
6667
rm -rf /var/lib/apt/lists/*
@@ -74,12 +75,19 @@ RUN apt update -q=2 \
7475
&& apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3*
7576

7677
# Add inception_graphdef model to example repo
78+
# FIXME: This should be changed to using the fetch_models.sh script
79+
# in order to ensure the public facing docs are up-to-date.
7780
WORKDIR /workspace/docs/examples/model_repository
78-
RUN mkdir -p inception_graphdef/1 && \
79-
wget -O ${TRITONTMP_DIR}/inception_v3_2016_08_28_frozen.pb.tar.gz \
80-
https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz && \
81-
(cd ${TRITONTMP_DIR} && tar xzf inception_v3_2016_08_28_frozen.pb.tar.gz) && \
82-
mv ${TRITONTMP_DIR}/inception_v3_2016_08_28_frozen.pb inception_graphdef/1/model.graphdef
81+
RUN mkdir -p model_repository/inception_onnx/1 && \
82+
wget -O /tmp/inception_v3_2016_08_28_frozen.pb.tar.gz \
83+
https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz && \
84+
(cd /tmp && tar xzf inception_v3_2016_08_28_frozen.pb.tar.gz) && \
85+
python3 -m venv tf2onnx && \
86+
source ./tf2onnx/bin/activate && \
87+
pip3 install "numpy<2" tensorflow tf2onnx && \
88+
python3 -m tf2onnx.convert --graphdef /tmp/inception_v3_2016_08_28_frozen.pb --output inception_v3_onnx.model.onnx --inputs input:0 --outputs InceptionV3/Predictions/Softmax:0 && \
89+
deactivate && \
90+
mv inception_v3_onnx.model.onnx model_repository/inception_onnx/1/model.onnx
8391

8492
# Update the qa/ directory with test executables, models, etc.
8593
WORKDIR /workspace
@@ -109,7 +117,7 @@ RUN mkdir -p qa/common && \
109117
cp -r docs/examples/model_repository/simple_identity qa/L0_grpc/models && \
110118
cp -r docs/examples/model_repository/simple_sequence qa/L0_grpc/models && \
111119
cp -r docs/examples/model_repository/simple_string qa/L0_grpc/models && \
112-
cp -r docs/examples/model_repository/inception_graphdef qa/L0_grpc/models && \
120+
cp -r docs/examples/model_repository/inception_onnx qa/L0_grpc/models && \
113121
mkdir qa/L0_grpc_state_cleanup/models && \
114122
cp -r /workspace/src/test/models/repeat_int32 qa/L0_grpc_state_cleanup/models/ && \
115123
mkdir qa/L0_http/models && \
@@ -118,7 +126,7 @@ RUN mkdir -p qa/common && \
118126
cp -r docs/examples/model_repository/simple_identity qa/L0_http/models && \
119127
cp -r docs/examples/model_repository/simple_sequence qa/L0_http/models && \
120128
cp -r docs/examples/model_repository/simple_string qa/L0_http/models && \
121-
cp -r docs/examples/model_repository/inception_graphdef qa/L0_http/models && \
129+
cp -r docs/examples/model_repository/inception_onnx qa/L0_grpc/models && \
122130
mkdir qa/L0_https/models && \
123131
cp -r docs/examples/model_repository/simple qa/L0_https/models/. && \
124132
mkdir qa/L0_secure_grpc/models && \
@@ -149,21 +157,20 @@ RUN mkdir -p qa/common && \
149157
cp bin/triton_json_test qa/L0_json/. && \
150158
cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \
151159
cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/. && \
152-
cp bin/input_byte_size_test qa/L0_input_validation/. && \
153-
cp -r docs/examples/model_repository/simple_identity qa/L0_input_validation/models
160+
cp bin/input_byte_size_test qa/L0_input_validation/.
154161

155162
RUN mkdir -p qa/pkgs && \
156163
cp python/triton*.whl qa/pkgs/. && \
157164
cp -rf python/test/. qa/L0_python_api/.
158165

159166
RUN mkdir -p qa/L0_simple_ensemble/models/simple/1 && \
160-
cp docs/examples/model_repository/simple/1/model.graphdef \
167+
cp docs/examples/model_repository/simple/1/model.onnx \
161168
qa/L0_simple_ensemble/models/simple/1/. && \
162169
mkdir -p qa/L0_simple_ensemble/models/simple/2 && \
163-
cp docs/examples/model_repository/simple/1/model.graphdef \
170+
cp docs/examples/model_repository/simple/1/model.onnx \
164171
qa/L0_simple_ensemble/models/simple/2/. && \
165172
mkdir -p qa/L0_socket/models/simple/1 && \
166-
cp docs/examples/model_repository/simple/1/model.graphdef \
173+
cp docs/examples/model_repository/simple/1/model.onnx \
167174
qa/L0_socket/models/simple/1/.
168175

169176
RUN mkdir -p qa/L0_backend_identity/models && \

Dockerfile.sdk

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#
3030

3131
# Base image on the minimum Triton container
32-
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.02-py3-min
32+
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.03-py3-min
3333

3434
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
3535
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo

TRITON_VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.56.0dev
1+
2.57.0dev

build.py

+14-46
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,14 @@
7171
#
7272

7373
DEFAULT_TRITON_VERSION_MAP = {
74-
"release_version": "2.56.0dev",
75-
"triton_container_version": "25.03dev",
76-
"upstream_container_version": "25.02",
77-
"ort_version": "1.20.1",
74+
"release_version": "2.57.0dev",
75+
"triton_container_version": "25.04dev",
76+
"upstream_container_version": "25.03",
77+
"ort_version": "1.21.0",
7878
"ort_openvino_version": "2025.0.0",
7979
"standalone_openvino_version": "2025.0.0",
8080
"dcgm_version": "3.3.6",
81-
"vllm_version": "0.7.0",
81+
"vllm_version": "0.7.3",
8282
"rhel_py_version": "3.12.3",
8383
}
8484

@@ -562,8 +562,6 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
562562
args = onnxruntime_cmake_args(images, library_paths)
563563
elif be == "openvino":
564564
args = openvino_cmake_args()
565-
elif be == "tensorflow":
566-
args = tensorflow_cmake_args(images, library_paths)
567565
elif be == "python":
568566
args = python_cmake_args()
569567
elif be == "dali":
@@ -795,23 +793,6 @@ def tensorrt_cmake_args():
795793
return cargs
796794

797795

798-
def tensorflow_cmake_args(images, library_paths):
799-
backend_name = "tensorflow"
800-
extra_args = []
801-
802-
# If a specific TF image is specified use it, otherwise pull from NGC.
803-
if backend_name in images:
804-
image = images[backend_name]
805-
else:
806-
image = "nvcr.io/nvidia/tensorflow:{}-tf2-py3".format(
807-
FLAGS.upstream_container_version
808-
)
809-
extra_args = [
810-
cmake_backend_arg(backend_name, "TRITON_TENSORFLOW_DOCKER_IMAGE", None, image)
811-
]
812-
return extra_args
813-
814-
815796
def dali_cmake_args():
816797
return [
817798
cmake_backend_enable("dali", "TRITON_DALI_SKIP_DOWNLOAD", False),
@@ -1233,10 +1214,10 @@ def create_dockerfile_linux(
12331214
argmap["BASE_IMAGE"],
12341215
)
12351216

1236-
# PyTorch and TensorFlow backends need extra CUDA and other
1217+
# PyTorch backends need extra CUDA and other
12371218
# dependencies during runtime that are missing in the CPU-only base container.
12381219
# These dependencies must be copied from the Triton Min image.
1239-
if not FLAGS.enable_gpu and (("pytorch" in backends) or ("tensorflow" in backends)):
1220+
if not FLAGS.enable_gpu and ("pytorch" in backends):
12401221
df += """
12411222
############################################################################
12421223
## Triton Min image
@@ -1602,10 +1583,10 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
16021583
cuda_arch=cuda_arch, libs_arch=libs_arch
16031584
)
16041585

1605-
if ("pytorch" in backends) or ("tensorflow" in backends):
1606-
# Add NCCL dependency for tensorflow/pytorch backend.
1586+
if "pytorch" in backends:
1587+
# Add NCCL dependency for pytorch backend.
16071588
# Note: Even though the build is CPU-only, the version of
1608-
# tensorflow/pytorch we are using depends upon the NCCL library.
1589+
# pytorch we are using depends upon the NCCL library.
16091590
# Since this dependency is not present in the ubuntu base image,
16101591
# we must copy it from the Triton min container ourselves.
16111592
df += """
@@ -1720,11 +1701,10 @@ def create_build_dockerfiles(
17201701
}
17211702

17221703
# For CPU-only image we need to copy some cuda libraries and dependencies
1723-
# since we are using PyTorch and TensorFlow containers that
1724-
# are not CPU-only.
1704+
# since we are using PyTorch containers that are not CPU-only.
17251705
if (
17261706
not FLAGS.enable_gpu
1727-
and (("pytorch" in backends) or ("tensorflow" in backends))
1707+
and ("pytorch" in backends)
17281708
and (target_platform() != "windows")
17291709
):
17301710
if "gpu-base" in images:
@@ -2351,7 +2331,6 @@ def enable_all():
23512331
"identity",
23522332
"square",
23532333
"repeat",
2354-
"tensorflow",
23552334
"onnxruntime",
23562335
"python",
23572336
"dali",
@@ -2586,7 +2565,7 @@ def enable_all():
25862565
"--image",
25872566
action="append",
25882567
required=False,
2589-
help='Use specified Docker image in build as <image-name>,<full-image-name>. <image-name> can be "base", "gpu-base", "tensorflow", or "pytorch".',
2568+
help='Use specified Docker image in build as <image-name>,<full-image-name>. <image-name> can be "base", "gpu-base", or "pytorch".',
25902569
)
25912570

25922571
parser.add_argument(
@@ -2887,12 +2866,6 @@ def enable_all():
28872866
parts = be.split(":")
28882867
if len(parts) == 1:
28892868
parts.append(default_repo_tag)
2890-
if parts[0] == "tensorflow1":
2891-
fail(
2892-
"Starting from Triton version 23.04, support for TensorFlow 1 has been discontinued. Please switch to Tensorflow 2."
2893-
)
2894-
if parts[0] == "tensorflow2":
2895-
parts[0] = "tensorflow"
28962869
log('backend "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
28972870
backends[parts[0]] = parts[1]
28982871

@@ -2939,13 +2912,10 @@ def enable_all():
29392912
len(parts) != 2, "--image must specify <image-name>,<full-image-registry>"
29402913
)
29412914
fail_if(
2942-
parts[0]
2943-
not in ["base", "gpu-base", "pytorch", "tensorflow", "tensorflow2"],
2915+
parts[0] not in ["base", "gpu-base", "pytorch"],
29442916
"unsupported value for --image",
29452917
)
29462918
log('image "{}": "{}"'.format(parts[0], parts[1]))
2947-
if parts[0] == "tensorflow2":
2948-
parts[0] = "tensorflow"
29492919
images[parts[0]] = parts[1]
29502920

29512921
# Initialize map of library paths for each backend.
@@ -2954,8 +2924,6 @@ def enable_all():
29542924
parts = lpath.split(":")
29552925
if len(parts) == 2:
29562926
log('backend "{}" library path "{}"'.format(parts[0], parts[1]))
2957-
if parts[0] == "tensorflow2":
2958-
parts[0] = "tensorflow"
29592927
library_paths[parts[0]] = parts[1]
29602928

29612929
# Parse any explicitly specified cmake arguments

compose.py

+7-15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python3
2-
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -71,14 +71,10 @@ def start_dockerfile(ddir, images, argmap, dockerfile_name, backends):
7171
argmap["TRITON_VERSION"], argmap["TRITON_CONTAINER_VERSION"], images["full"]
7272
)
7373

74-
# PyTorch, TensorFlow backends need extra CUDA and other
74+
# PyTorch backends need extra CUDA and other
7575
# dependencies during runtime that are missing in the CPU-only base container.
7676
# These dependencies must be copied from the Triton Min image.
77-
if not FLAGS.enable_gpu and (
78-
("pytorch" in backends)
79-
or ("tensorflow" in backends)
80-
or ("tensorflow2" in backends)
81-
):
77+
if not FLAGS.enable_gpu and "pytorch" in backends:
8278
df += """
8379
FROM {} AS min_container
8480
@@ -302,7 +298,7 @@ def create_argmap(images, skip_pull):
302298
dcgm_ver = re.search("DCGM_VERSION=([\S]{4,}) ", vars)
303299
dcgm_version = ""
304300
if dcgm_ver is None:
305-
dcgm_version = "2.2.3"
301+
dcgm_version = "3.3.6"
306302
log(
307303
"WARNING: DCGM version not found from image, installing the earlierst version {}".format(
308304
dcgm_version
@@ -406,7 +402,7 @@ def create_argmap(images, skip_pull):
406402
'<image-name>,<full-image-name>. <image-name> can be "min", "gpu-min" '
407403
'or "full". Both "min" and "full" need to be specified at the same time.'
408404
'This will override "--container-version". "gpu-min" is needed for '
409-
"CPU-only container to copy TensorFlow and PyTorch deps.",
405+
"CPU-only container to copy PyTorch deps.",
410406
)
411407
parser.add_argument(
412408
"--enable-gpu",
@@ -504,13 +500,9 @@ def create_argmap(images, skip_pull):
504500
fail_if(len(images) < 2, "Need to specify both 'full' and 'min' images if at all")
505501

506502
# For CPU-only image we need to copy some cuda libraries and dependencies
507-
# since we are using PyTorch, TensorFlow 1, TensorFlow 2 containers that
503+
# since we are using PyTorch containers that
508504
# are not CPU-only.
509-
if (
510-
("pytorch" in FLAGS.backend)
511-
or ("tensorflow" in FLAGS.backend)
512-
or ("tensorflow2" in FLAGS.backend)
513-
) and ("gpu-min" not in images):
505+
if ("pytorch" in FLAGS.backend) and ("gpu-min" not in images):
514506
images["gpu-min"] = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
515507
FLAGS.container_version
516508
)

deploy/alibaba-cloud/README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -39,7 +39,7 @@ This repository contains information about how to deploy NVIDIA Triton Inference
3939
- EAS provides a simple way for deep learning developers to deploy their models in Alibaba Cloud.
4040
- Using **Triton Processor** is the recommended way on EAS to deploy Triton Inference Server. Users can simply deploy a Triton Server by preparing models and creating a EAS service by setting processor type to `triton`.
4141
- Models should be uploaded to Alibaba Cloud's OSS(Object Storage Service). User's model repository in OSS will be mounted onto local path visible to Triton Server.
42-
- This documentation uses Triton's own example models for demo. The tensorflow inception model can be downloaded by the `fetch_models.sh` script.
42+
- This documentation uses Triton's own example models for demo. The ONNX inception v3 model can be obtained by the `fetch_models.sh` script.
4343

4444
# Prerequisites
4545
- You should register an Alibaba Cloud Account, and being able to use EAS by [eascmd](https://help.aliyun.com/document_detail/111031.html?spm=a2c4g.11186623.6.752.42356f46FN5fU1), which is a command line tool to create stop or scale services on EAS.
@@ -48,10 +48,10 @@ This repository contains information about how to deploy NVIDIA Triton Inference
4848

4949
# Demo Instruction
5050
## Prepare a model repo directory in OSS
51-
Download the tensorflow inception model via [fetch_model.sh](https://github.com/triton-inference-server/server/blob/main/docs/examples/fetch_models.sh). Then using [ossutil](https://help.aliyun.com/document_detail/50452.html?spm=a2c4g.11186623.6.833.26d66d51dPEytI) , which is a command line tool to use OSS, to upload the model to a certain OSS dir as you want.
51+
Download the ONNX inception v3 model via [fetch_model.sh](https://github.com/triton-inference-server/server/blob/main/docs/examples/fetch_models.sh). Then using [ossutil](https://help.aliyun.com/document_detail/50452.html?spm=a2c4g.11186623.6.833.26d66d51dPEytI) , which is a command line tool to use OSS, to upload the model to a certain OSS dir as you want.
5252

5353
```
54-
./ossutil cp inception_graphdef/ oss://triton-model-repo/models
54+
./ossutil cp inception_v3_onnx/ oss://triton-model-repo/models
5555
```
5656
## Create Triton Service with json config by eascmd
5757
The following is the json we use when creating a Triton Server on EAS.
@@ -125,7 +125,7 @@ triton_client = httpclient.InferenceServerClient(url=URL, verbose=False)
125125
start = time.time()
126126
for i in range(10):
127127
results = triton_client.infer(
128-
"inception_graphdef", inputs=[input_img], outputs=[output], headers=HEADERS
128+
"inception_v3_onnx", inputs=[input_img], outputs=[output], headers=HEADERS
129129
)
130130
res_body = results.get_response()
131131
elapsed_ms = (time.time() - start) * 1000

deploy/aws/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
2+
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -224,7 +224,7 @@ using image classification models being served by the inference
224224
server. For example,
225225

226226
```
227-
$ image_client -u 34.83.9.133:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
227+
$ image_client -u 34.83.9.133:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
228228
Request 0, batch size 1
229229
Image 'images/mug.jpg':
230230
504 (COFFEE MUG) = 0.723992

deploy/aws/values.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.02-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.03-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: s3://triton-inference-server-repository/model_repository
3333
numGpus: 1

deploy/fleetcommand/Chart.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
apiVersion: v1
2828
# appVersion is the Triton version; update when changing release
29-
appVersion: "2.55.0"
29+
appVersion: "2.56.0"
3030
description: Triton Inference Server (Fleet Command)
3131
name: triton-inference-server
3232
# version is the Chart version; update when changing anything in the chart

deploy/fleetcommand/values.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.02-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.03-py3
3131
pullPolicy: IfNotPresent
3232
numGpus: 1
3333
serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
4747
#
4848
# To set model control mode, uncomment and configure below
4949
# TODO: Fix the following url, it is invalid
50-
# See https://github.com/triton-inference-server/server/blob/r25.02/docs/model_management.md
50+
# See https://github.com/triton-inference-server/server/blob/r25.03/docs/model_management.md
5151
# for more details
5252
#- --model-control-mode=explicit|poll|none
5353
#
5454
# Additional server args
5555
#
56-
# see https://github.com/triton-inference-server/server/blob/r25.02/README.md
56+
# see https://github.com/triton-inference-server/server/blob/r25.03/README.md
5757
# for more details
5858

5959
service:

0 commit comments

Comments
 (0)