diff --git a/examples/gke_inference_gateway_standard_cluster/README.md b/examples/gke_inference_gateway_standard_cluster/README.md
new file mode 100644
index 0000000000..40763e5377
--- /dev/null
+++ b/examples/gke_inference_gateway_standard_cluster/README.md
@@ -0,0 +1,79 @@
+# GKE Inference Gateway Example
+
+This example provisions a GKE Standard cluster and a node pool with H100 GPUs, suitable for deploying and serving Large Language Models (LLMs) using the GKE Inference Gateway.
+
+The cluster is configured with:
+- GKE Gateway API enabled.
+- Managed Prometheus for monitoring.
+- DCGM for GPU monitoring.
+- A dedicated node pool with NVIDIA H100 80GB GPUs.
+
+This Terraform script automates the deployment of all necessary Kubernetes resources, including:
+- Authorization for metrics scraping.
+- A vLLM model server for a Llama3.1 model.
+- GKE Inference Gateway CRDs.
+- GKE Inference Gateway resources (`InferencePool`, `InferenceObjective`, `Gateway`, `HTTPRoute`).
+
+## Usage
+
+1.  **Enable APIs**
+
+    ```bash
+    gcloud services enable container.googleapis.com
+    ```
+
+2.  **Set up your environment**
+
+    You will need to set the following environment variables. You may also need to create a `terraform.tfvars` file to provide values for the variables in `variables.tf`.
+
+    ```bash
+    export PROJECT_ID="your-project-id"
+    export REGION="us-central1"
+    export CLUSTER_NAME="inference-gateway-cluster"
+    export HF_TOKEN="your-hugging-face-token"
+    ```
+
+3.  **Run Terraform**
+
+    The `terraform apply` command will provision the GKE cluster and deploy all the necessary Kubernetes resources.
+
+    ```bash
+    terraform init
+    terraform apply
+    ```
+
+4.  **Configure kubectl**
+
+    After the apply is complete, configure `kubectl` to communicate with your new cluster.
+
+    ```bash
+    gcloud container clusters get-credentials $(terraform output -raw cluster_name) --region $(terraform output -raw location)
+    ```
+
+5.  **Send an inference request**
+
+    Get the Gateway IP address:
+    ```bash
+    IP=$(kubectl get gateway/inference-gateway -o jsonpath='{.status.addresses[0].value}')
+    PORT=80
+    ```
+
+    Send a request:
+    ```bash
+    curl -i -X POST http://${IP}:${PORT}/v1/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "food-review",
+        "prompt": "What is a good recipe for a chicken curry?",
+        "max_tokens": 100,
+        "temperature": "0.7"
+    }'
+    ```
+
+## Cleanup
+
+Running `terraform destroy` will deprovision the GKE cluster and all associated Kubernetes resources.
+
+```bash
+terraform destroy
+```
\ No newline at end of file
diff --git a/examples/gke_inference_gateway_standard_cluster/main.tf b/examples/gke_inference_gateway_standard_cluster/main.tf
new file mode 100644
index 0000000000..baba283c16
--- /dev/null
+++ b/examples/gke_inference_gateway_standard_cluster/main.tf
@@ -0,0 +1,530 @@
+/**
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+locals {
+  cluster_type          = "gke-standard"
+  default_workload_pool = "${var.project_id}.svc.id.goog"
+}
+
+data "google_client_config" "default" {}
+
+provider "kubernetes" {
+  host                   = "https://${module.gke.endpoint}"
+  token                  = data.google_client_config.default.access_token
+  cluster_ca_certificate = base64decode(module.gke.ca_certificate)
+}
+
+provider "helm" {
+  kubernetes {
+    host                   = "https://${module.gke.endpoint}"
+    token                  = data.google_client_config.default.access_token
+    cluster_ca_certificate = base64decode(module.gke.ca_certificate)
+  }
+}
+
+data "google_compute_subnetwork" "subnetwork" {
+  name    = var.subnetwork
+  project = var.project_id
+  region  = var.region
+}
+
+module "gke" {
+  source  = "terraform-google-modules/kubernetes-engine/google//modules/gke-standard-cluster"
+  version = "~> 39.0"
+
+  project_id    = var.project_id
+  name       = "${local.cluster_type}-cluster${var.cluster_name_suffix}"
+  location   = var.region
+  network    = var.network
+  subnetwork = var.subnetwork
+  release_channel = "RAPID"
+  gateway_api_config = {
+    channel = "CHANNEL_STANDARD"
+  }
+  monitoring_config = {
+    enable_managed_prometheus = true
+    enabled_components        = ["SYSTEM_COMPONENTS", "DCGM"]
+  }
+  logging_service    = "logging.googleapis.com/kubernetes"
+
+
+  ip_allocation_policy = {
+    cluster_secondary_range_name  = var.ip_range_pods
+    services_secondary_range_name = var.ip_range_services
+  }
+
+  deletion_protection      = false
+  remove_default_node_pool = false
+
+  workload_identity_config = {
+    workload_pool = local.default_workload_pool
+  }
+
+  addons_config = {
+    http_load_balancing = {
+      enabled = true
+    }
+    dns_cache_config = {
+      enabled = var.dns_cache
+    }
+
+    gce_persistent_disk_csi_driver_config = {
+      enabled = var.gce_pd_csi_driver
+    }
+  }
+  enable_shielded_nodes = true
+}
+
+module "node_pool" {
+  source  = "terraform-google-modules/kubernetes-engine/google//modules/gke-node-pool"
+  version = "~> 39.0"
+
+  project_id  = var.project_id
+  location = var.zone
+  cluster  = module.gke.cluster_name
+  name     = "gpupool"
+  node_count = 1
+
+  node_config = {
+    disk_size_gb    = 200
+    disk_type       = "pd-standard"
+    image_type      = "COS_CONTAINERD"
+    machine_type    = "a3-highgpu-2g"
+    service_account = var.service_account
+    guest_accelerator = {
+      type  = "nvidia-h100-80gb"
+      count = 2
+    }
+    gpu_driver_installation_config = {
+      gpu_driver_version = "LATEST"
+    }
+    workload_metadata_config = {
+      mode = "GKE_METADATA"
+    }
+  }
+}
+
+resource "kubernetes_secret" "hf_secret" {
+  metadata {
+    name = "hf-token"
+  }
+  data = {
+    token = var.hf_token
+  }
+  type = "Opaque"
+}
+
+resource "kubernetes_config_map" "vllm_adapters" {
+  metadata {
+    name = "vllm-llama3.1-8b-adapters"
+  }
+  data = {
+    "configmap.yaml" = <<-EOT
+      vLLMLoRAConfig:
+        name: vllm-llama3.1-8b-instruct
+        port: 8000
+        defaultBaseModel: meta-llama/Llama-3.1-8B-Instruct
+        ensureExist:
+          models:
+          - id: food-review
+            source: Kawon/llama3.1-food-finetune_v14_r8
+          - id: cad-fabricator
+            source: redcathode/fabricator
+    EOT
+  }
+}
+
+resource "kubernetes_deployment" "vllm" {
+  metadata {
+    name = "vllm-llama3.1-8b-instruct"
+  }
+  spec {
+    replicas = 3
+    selector {
+      match_labels = {
+        app = "vllm-llama3.1-8b-instruct"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          app = "vllm-llama3.1-8b-instruct"
+        }
+      }
+      spec {
+        termination_grace_period_seconds = 130
+        enable_service_links             = false
+        container {
+          name              = "vllm"
+          image             = "vllm/vllm-openai:latest"
+          image_pull_policy = "Always"
+          command           = ["python3", "-m", "vllm.entrypoints.openai.api_server"]
+          args = [
+            "--model", "meta-llama/Llama-3.1-8B-Instruct",
+            "--tensor-parallel-size", "1",
+            "--port", "8000",
+            "--enable-lora",
+            "--max-loras", "2",
+            "--max-cpu-loras", "12"
+          ]
+          port {
+            container_port = 8000
+            name           = "http"
+            protocol       = "TCP"
+          }
+          env {
+            name  = "VLLM_USE_V1"
+            value = "1"
+          }
+          env {
+            name  = "PORT"
+            value = "8000"
+          }
+          env {
+            name = "HUGGING_FACE_HUB_TOKEN"
+            value_from {
+              secret_key_ref {
+                name = kubernetes_secret.hf_secret.metadata[0].name
+                key  = "token"
+              }
+            }
+          }
+          env {
+            name  = "VLLM_ALLOW_RUNTIME_LORA_UPDATING"
+            value = "true"
+          }
+          lifecycle {
+            pre_stop {
+              exec {
+                command = ["/bin/sh", "-c", "sleep 30"]
+              }
+            }
+          }
+          resources {
+            limits = {
+              "nvidia.com/gpu" = 1
+            }
+            requests = {
+              "nvidia.com/gpu" = 1
+            }
+          }
+          liveness_probe {
+            http_get {
+              path   = "/health"
+              port   = "http"
+              scheme = "HTTP"
+            }
+            period_seconds     = 1
+            success_threshold  = 1
+            failure_threshold  = 5
+            timeout_seconds    = 1
+          }
+          readiness_probe {
+            http_get {
+              path   = "/health"
+              port   = "http"
+              scheme = "HTTP"
+            }
+            period_seconds     = 1
+            success_threshold  = 1
+            failure_threshold  = 1
+            timeout_seconds    = 1
+          }
+          startup_probe {
+            http_get {
+              path   = "/health"
+              port   = "http"
+              scheme = "HTTP"
+            }
+            failure_threshold   = 600
+            initial_delay_seconds = 2
+            period_seconds      = 1
+          }
+          volume_mount {
+            mount_path = "/data"
+            name       = "data"
+          }
+          volume_mount {
+            mount_path = "/dev/shm"
+            name       = "shm"
+          }
+          volume_mount {
+            mount_path = "/adapters"
+            name       = "adapters"
+          }
+        }
+        container {
+          name  = "lora-adapter-syncer"
+          image = "us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main"
+          image_pull_policy = "Always"
+          env {
+            name  = "DYNAMIC_LORA_ROLLOUT_CONFIG"
+            value = "/config/configmap.yaml"
+          }
+          volume_mount {
+            name       = "config-volume"
+            mount_path = "/config"
+          }
+        }
+        volume {
+          name = "data"
+          empty_dir {}
+        }
+        volume {
+          name = "shm"
+          empty_dir {
+            medium = "Memory"
+          }
+        }
+        volume {
+          name = "adapters"
+          empty_dir {}
+        }
+        volume {
+          name = "config-volume"
+          config_map {
+            name = kubernetes_config_map.vllm_adapters.metadata[0].name
+          }
+        }
+        node_selector = {
+          "cloud.google.com/gke-accelerator" = "nvidia-h100-80gb"
+        }
+      }
+    }
+  }
+}
+
+resource "null_resource" "apply_crds" {
+  provisioner "local-exec" {
+    command = "kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.0/manifests.yaml"
+  }
+  depends_on = [module.gke]
+}
+
+resource "kubernetes_cluster_role" "metrics_reader" {
+  metadata {
+    name = "inference-gateway-metrics-reader"
+  }
+  rule {
+    non_resource_urls = ["/metrics"]
+    verbs             = ["get"]
+  }
+}
+
+resource "kubernetes_service_account" "metrics_reader" {
+  metadata {
+    name      = "inference-gateway-sa-metrics-reader"
+    namespace = "default"
+  }
+}
+
+resource "kubernetes_cluster_role_binding" "metrics_reader" {
+  metadata {
+    name = "inference-gateway-sa-metrics-reader-role-binding"
+  }
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = kubernetes_cluster_role.metrics_reader.metadata[0].name
+  }
+  subject {
+    kind      = "ServiceAccount"
+    name      = kubernetes_service_account.metrics_reader.metadata[0].name
+    namespace = "default"
+  }
+}
+
+resource "kubernetes_secret" "metrics_reader_token" {
+  metadata {
+    name      = "inference-gateway-sa-metrics-reader-secret"
+    namespace = "default"
+    annotations = {
+      "kubernetes.io/service-account.name" = kubernetes_service_account.metrics_reader.metadata[0].name
+    }
+  }
+  type = "kubernetes.io/service-account-token"
+}
+
+resource "kubernetes_cluster_role" "secret_reader" {
+  metadata {
+    name = "inference-gateway-sa-metrics-reader-secret-read"
+  }
+  rule {
+    api_groups     = [""]
+    resources      = ["secrets"]
+    resource_names = [kubernetes_secret.metrics_reader_token.metadata[0].name]
+    verbs          = ["get", "list", "watch"]
+  }
+}
+
+resource "kubernetes_cluster_role_binding" "gmp_secret_reader" {
+  metadata {
+    name = "gmp-system:collector:inference-gateway-sa-metrics-reader-secret-read"
+  }
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = kubernetes_cluster_role.secret_reader.metadata[0].name
+  }
+  subject {
+    kind      = "ServiceAccount"
+    name      = "collector"
+    namespace = "gmp-system"
+  }
+}
+
+resource "helm_release" "inference_pool" {
+  name       = "vllm-llama3.1-8b-instruct"
+  repository = "oci://registry.k8s.io/gateway-api-inference-extension/charts"
+  chart      = "inferencepool"
+  version    = "v1.0.0"
+
+  set {
+    name  = "inferencePool.modelServers.matchLabels.app"
+    value = "vllm-llama3.1-8b-instruct"
+  }
+  set {
+    name  = "provider.name"
+    value = "gke"
+  }
+  set {
+    name  = "healthCheckPolicy.create"
+    value = "false"
+  }
+  depends_on = [kubernetes_deployment.vllm, null_resource.apply_crds]
+}
+
+resource "kubernetes_manifest" "food_review_model" {
+  manifest = {
+    "apiVersion" = "inference.networking.k8s.io/v1alpha1"
+    "kind"       = "InferenceObjective"
+    "metadata" = {
+      "name" = "food-review"
+    }
+    "spec" = {
+      "priority" = 10
+      "poolRef" = {
+        "name" = "vllm-llama3.1-8b-instruct"
+        "kind" = "InferencePool"
+      }
+    }
+  }
+  depends_on = [helm_release.inference_pool]
+}
+
+resource "kubernetes_manifest" "base_model" {
+  manifest = {
+    "apiVersion" = "inference.networking.k8s.io/v1alpha1"
+    "kind"       = "InferenceObjective"
+    "metadata" = {
+      "name" = "llama3-base-model"
+    }
+    "spec" = {
+      "priority" = 20
+      "poolRef" = {
+        "name" = "vllm-llama3.1-8b-instruct"
+        "kind" = "InferencePool"
+      }
+    }
+  }
+  depends_on = [helm_release.inference_pool]
+}
+
+resource "kubernetes_manifest" "health_check_policy" {
+  manifest = {
+    "apiVersion" = "networking.gke.io/v1"
+    "kind"       = "HealthCheckPolicy"
+    "metadata" = {
+      "name"      = "health-check-policy"
+      "namespace" = "default"
+    }
+    "spec" = {
+      "targetRef" = {
+        "group" = "inference.networking.k8s.io"
+        "kind"  = "InferencePool"
+        "name"  = "vllm-llama3.1-8b-instruct"
+      }
+      "default" = {
+        "config" = {
+          "type" = "HTTP"
+          "httpHealthCheck" = {
+            "requestPath" = "/health"
+            "port"        = 8000
+          }
+        }
+      }
+    }
+  }
+  depends_on = [helm_release.inference_pool]
+}
+
+resource "kubernetes_manifest" "gateway" {
+  manifest = {
+    "apiVersion" = "gateway.networking.k8s.io/v1"
+    "kind"       = "Gateway"
+    "metadata" = {
+      "name" = "inference-gateway"
+    }
+    "spec" = {
+      "gatewayClassName" = "gke-l7-regional-external-managed"
+      "listeners" = [
+        {
+          "protocol" = "HTTP"
+          "port"     = 80
+          "name"     = "http"
+        }
+      ]
+    }
+  }
+  depends_on = [helm_release.inference_pool]
+}
+
+resource "kubernetes_manifest" "http_route" {
+  manifest = {
+    "apiVersion" = "gateway.networking.k8s.io/v1"
+    "kind"       = "HTTPRoute"
+    "metadata" = {
+      "name" = "my-route"
+    }
+    "spec" = {
+      "parentRefs" = [
+        {
+          "name" = "inference-gateway"
+        }
+      ]
+      "rules" = [
+        {
+          "matches" = [
+            {
+              "path" = {
+                "type"  = "PathPrefix"
+                "value" = "/"
+              }
+            }
+          ]
+          "backendRefs" = [
+            {
+              "name"  = "vllm-llama3.1-8b-instruct"
+              "group" = "inference.networking.k8s.io"
+              "kind"  = "InferencePool"
+            }
+          ]
+        }
+      ]
+    }
+  }
+  depends_on = [kubernetes_manifest.gateway]
+}
\ No newline at end of file
diff --git a/examples/gke_inference_gateway_standard_cluster/outputs.tf b/examples/gke_inference_gateway_standard_cluster/outputs.tf
new file mode 100644
index 0000000000..7f97dde8a9
--- /dev/null
+++ b/examples/gke_inference_gateway_standard_cluster/outputs.tf
@@ -0,0 +1,57 @@
+/**
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+output "endpoint" {
+  sensitive   = true
+  description = "The cluster endpoint"
+  value       = module.gke.endpoint
+}
+
+output "ca_certificate" {
+  sensitive   = true
+  description = "The cluster ca certificate (base64 encoded)"
+  value       = module.gke.ca_certificate
+}
+
+output "project_id" {
+  description = "The project ID the cluster is in"
+  value       = var.project_id
+}
+
+output "location" {
+  description = "Cluster location"
+  value       = module.gke.location
+}
+
+output "node_locations" {
+  description = "Cluster node locations"
+  value       = module.gke.node_locations
+}
+
+output "addons_config" {
+  description = "The configuration for addons supported by GKE Autopilot."
+  value       = module.gke.addons_config
+}
+
+output "cluster_name" {
+  description = "Cluster name"
+  value       = module.gke.cluster_name
+}
+
+output "master_version" {
+  description = "The master Kubernetes version"
+  value       = module.gke.master_version
+}
diff --git a/examples/gke_inference_gateway_standard_cluster/variables.tf b/examples/gke_inference_gateway_standard_cluster/variables.tf
new file mode 100644
index 0000000000..d0b99efafb
--- /dev/null
+++ b/examples/gke_inference_gateway_standard_cluster/variables.tf
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+variable "project_id" {
+  description = "The project ID to host the cluster in"
+  type        = string
+}
+
+variable "region" {
+  description = "The region to host the cluster in"
+  type        = string
+  default     = "us-central1"
+}
+
+variable "zone" {
+  description = "The zone to host the cluster in"
+  type        = string
+  default     = "us-central1-a"
+}
+
+variable "network" {
+  description = "The VPC network to host the cluster in"
+  type        = string
+}
+
+variable "subnetwork" {
+  description = "The subnetwork to host the cluster in"
+  type        = string
+}
+
+variable "ip_range_pods" {
+  description = "The secondary ip range for pods"
+  type        = string
+}
+
+variable "ip_range_services" {
+  description = "The secondary ip range for services"
+  type        = string
+}
+
+variable "cluster_name_suffix" {
+  description = "A suffix to append to the cluster name"
+  type        = string
+  default     = ""
+}
+
+variable "service_account" {
+  description = "Service account to attach to the node pool."
+  type        = string
+  default     = null
+}
+
+variable "dns_cache" {
+  description = "Enable DNS cache for the cluster"
+  type        = bool
+  default     = false
+}
+
+variable "gce_pd_csi_driver" {
+  description = "Enable GCE Persistent Disk CSI driver"
+  type        = bool
+  default     = true
+}
+
+variable "hf_token" {
+  description = "Hugging Face token"
+  type        = string
+  sensitive   = true
+}
\ No newline at end of file
diff --git a/examples/gke_inference_gateway_standard_cluster/versions.tf b/examples/gke_inference_gateway_standard_cluster/versions.tf
new file mode 100644
index 0000000000..220cbfdb31
--- /dev/null
+++ b/examples/gke_inference_gateway_standard_cluster/versions.tf
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+terraform {
+  required_version = ">= 1.3"
+  required_providers {
+    google = {
+      source = "hashicorp/google"
+    }
+    google-beta = {
+      source = "hashicorp/google-beta"
+    }
+    kubernetes = {
+      source = "hashicorp/kubernetes"
+    }
+  }
+}