diff --git a/charts/llm-d/Chart.yaml b/charts/llm-d/Chart.yaml
index 7fb2011..5347258 100644
--- a/charts/llm-d/Chart.yaml
+++ b/charts/llm-d/Chart.yaml
@@ -1,7 +1,7 @@
apiVersion: v2
name: llm-d
type: application
-version: 1.0.20
+version: 1.0.21
appVersion: "0.1"
icon: 
description: llm-d is a Kubernetes-native high-performance distributed LLM inference framework
diff --git a/charts/llm-d/README.md b/charts/llm-d/README.md
index 44109fa..dd1c260 100644
--- a/charts/llm-d/README.md
+++ b/charts/llm-d/README.md
@@ -1,7 +1,7 @@
# llm-d Helm Chart
-
+

llm-d is a Kubernetes-native high-performance distributed LLM inference framework
@@ -194,7 +194,7 @@ Kubernetes: `>= 1.30.0-0`
| modelservice.epp.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` |
| modelservice.epp.image.registry | Endpoint picker image registry | string | `"ghcr.io"` |
| modelservice.epp.image.repository | Endpoint picker image repository | string | `"llm-d/llm-d-inference-scheduler"` |
-| modelservice.epp.image.tag | Endpoint picker image tag | string | `"0.0.4"` |
+| modelservice.epp.image.tag | Endpoint picker image tag | string | `"v0.1.0"` |
| modelservice.epp.metrics | Enable metrics gathering via podMonitor / ServiceMonitor | object | `{"enabled":true,"serviceMonitor":{"annotations":{},"interval":"10s","labels":{},"namespaceSelector":{"any":false,"matchNames":[]},"path":"/metrics","port":"metrics","selector":{"matchLabels":{}}}}` |
| modelservice.epp.metrics.enabled | Enable metrics scraping from endpoint picker service | bool | `true` |
| modelservice.epp.metrics.serviceMonitor | Prometheus ServiceMonitor configuration
Ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md | object | See below |
@@ -215,7 +215,7 @@ Kubernetes: `>= 1.30.0-0`
| modelservice.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` |
| modelservice.image.registry | Model Service controller image registry | string | `"ghcr.io"` |
| modelservice.image.repository | Model Service controller image repository | string | `"llm-d/llm-d-model-service"` |
-| modelservice.image.tag | Model Service controller image tag | string | `"0.0.10"` |
+| modelservice.image.tag | Model Service controller image tag | string | `"v0.0.15"` |
| modelservice.inferenceSimulator | llm-d inference simulator container options | object | See below |
| modelservice.inferenceSimulator.containerSecurityContext | Security settings for a Container.
Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container | object | `{}` |
| modelservice.inferenceSimulator.image | llm-d inference simulator image used in ModelService CR presets | object | See below |
@@ -253,12 +253,12 @@ Kubernetes: `>= 1.30.0-0`
| modelservice.replicas | Number of controller replicas | int | `1` |
| modelservice.routingProxy | Routing proxy container options | object | See below |
| modelservice.routingProxy.containerSecurityContext | Security settings for a Container.
Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container | object | `{}` |
-| modelservice.routingProxy.image | Routing proxy image used in ModelService CR presets | object | `{"imagePullPolicy":"IfNotPresent","pullSecrets":[],"registry":"ghcr.io","repository":"llm-d/llm-d-routing-sidecar","tag":"0.0.6"}` |
+| modelservice.routingProxy.image | Routing proxy image used in ModelService CR presets | object | `{"imagePullPolicy":"IfNotPresent","pullSecrets":[],"registry":"ghcr.io","repository":"llm-d/llm-d-routing-sidecar","tag":"0.0.7"}` |
| modelservice.routingProxy.image.imagePullPolicy | Specify a imagePullPolicy | string | `"IfNotPresent"` |
| modelservice.routingProxy.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` |
| modelservice.routingProxy.image.registry | Routing proxy image registry | string | `"ghcr.io"` |
| modelservice.routingProxy.image.repository | Routing proxy image repository | string | `"llm-d/llm-d-routing-sidecar"` |
-| modelservice.routingProxy.image.tag | Routing proxy image tag | string | `"0.0.6"` |
+| modelservice.routingProxy.image.tag | Routing proxy image tag | string | `"0.0.7"` |
| modelservice.service.enabled | Toggle to deploy a Service resource for Model service controller | bool | `true` |
| modelservice.service.port | Port number exposed from Model Service controller | int | `8443` |
| modelservice.service.type | Service type | string | `"ClusterIP"` |
diff --git a/charts/llm-d/templates/modelservice/deployment.yaml b/charts/llm-d/templates/modelservice/deployment.yaml
index 2331650..854a1ec 100644
--- a/charts/llm-d/templates/modelservice/deployment.yaml
+++ b/charts/llm-d/templates/modelservice/deployment.yaml
@@ -58,7 +58,11 @@ spec:
{{- include "common.tplvalues.render" ( dict "value" .Values.modelservice.tolerations "context" $) | nindent 8 }}
{{- end }}
containers:
- - args:
+ - name: manager
+ command:
+ - /manager
+ - run
+ args:
- --leader-elect=false
- --health-probe-bind-address=:8081
- --epp-cluster-role
@@ -67,8 +71,6 @@ spec:
- {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.epp.image) "context" $) }}
- --pd-pull-secrets
- {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.vllm.image) "context" $) }}
- command:
- - /manager
image: {{ include "modelservice.image" . }}
imagePullPolicy: {{ .Values.modelservice.image.imagePullPolicy }}
{{- if .Values.modelservice.containerSecurityContext }}
@@ -81,7 +83,6 @@ spec:
port: 8081
initialDelaySeconds: 15
periodSeconds: 20
- name: manager
readinessProbe:
httpGet:
path: /readyz
diff --git a/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml b/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml
index 33425ff..45d3650 100644
--- a/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml
+++ b/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml
@@ -18,6 +18,16 @@ metadata:
{{- include "common.tplvalues.render" ( dict "value" .Values.modelservice.annotations "context" $) | nindent 4 }}
{{- end }}
rules:
+- apiGroups:
+ - gateway.networking.k8s.io
+ resources:
+ - gatewayclasses
+ - gateways
+ - httproutes
+ verbs:
+ - get
+ - list
+ - watch
- apiGroups:
- ""
resources:
diff --git a/charts/llm-d/values.schema.json b/charts/llm-d/values.schema.json
index 47a46fa..3b369ed 100644
--- a/charts/llm-d/values.schema.json
+++ b/charts/llm-d/values.schema.json
@@ -4025,7 +4025,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.4",
+ "default": "v0.1.0",
"description": "Endpoint picker image tag",
"required": [],
"title": "tag"
@@ -4491,7 +4491,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.10",
+ "default": "v0.0.15",
"description": "Model Service controller image tag",
"required": [],
"title": "tag"
@@ -6580,7 +6580,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.6",
+ "default": "0.0.7",
"description": "Routing proxy image tag",
"required": [],
"title": "tag"
diff --git a/charts/llm-d/values.schema.tmpl.json b/charts/llm-d/values.schema.tmpl.json
index f11d4a4..f037949 100644
--- a/charts/llm-d/values.schema.tmpl.json
+++ b/charts/llm-d/values.schema.tmpl.json
@@ -848,7 +848,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.4",
+ "default": "v0.1.0",
"description": "Endpoint picker image tag",
"required": [],
"title": "tag"
@@ -1018,7 +1018,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.10",
+ "default": "v0.0.15",
"description": "Model Service controller image tag",
"required": [],
"title": "tag"
@@ -1340,7 +1340,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.6",
+ "default": "0.0.7",
"description": "Routing proxy image tag",
"required": [],
"title": "tag"
diff --git a/charts/llm-d/values.yaml b/charts/llm-d/values.yaml
index cd62337..d8dc82d 100644
--- a/charts/llm-d/values.yaml
+++ b/charts/llm-d/values.yaml
@@ -372,7 +372,7 @@ modelservice:
repository: llm-d/llm-d-model-service
# -- Model Service controller image tag
- tag: "0.0.10"
+ tag: "v0.0.15"
# -- Specify a imagePullPolicy
imagePullPolicy: "Always"
@@ -449,7 +449,7 @@ modelservice:
repository: llm-d/llm-d-inference-scheduler
# -- Endpoint picker image tag
- tag: 0.0.4
+ tag: v0.1.0
# -- Specify a imagePullPolicy
imagePullPolicy: "Always"
@@ -815,7 +815,7 @@ modelservice:
repository: llm-d/llm-d-routing-sidecar
# -- Routing proxy image tag
- tag: "0.0.6"
+ tag: "0.0.7"
# -- Specify a imagePullPolicy
imagePullPolicy: "IfNotPresent"