diff --git a/examples/confidential_gpu/README.md b/examples/confidential_gpu/README.md new file mode 100644 index 00000000..f0ea9050 --- /dev/null +++ b/examples/confidential_gpu/README.md @@ -0,0 +1,32 @@ +# confidential computing with GPU + +This is an example of a VM with GPU, using confidential computing, +encrypted disk using a multiregion (US by default) Cloud HSM key +and a custom service account with cloud-platform scope. It creates +the VM with a startup script that installs Nvidia H100 drivers and +enables confidential computing on the GPU. + + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| key | Key name. | `string` | n/a | yes | +| keyring | Keyring name. | `string` | n/a | yes | +| location | Location for the resources (keyring, key, network, etc.). | `string` | `"us"` | no | +| project\_id | The Google Cloud project ID. | `string` | n/a | yes | +| region | The GCP region to create and test resources in. | `string` | `"us-central1"` | no | +| service\_account\_roles | Predefined roles for the Service account that will be created for the VM. Remember to follow principles of least privileges with Cloud IAM. | `list(string)` | `[]` | no | +| subnetwork | The subnetwork selflink to host the compute instances in. | `string` | n/a | yes | +| suffix | A suffix to be used as an identifier for resources. (e.g., suffix for KMS Key, Keyring). | `string` | `""` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| instance\_self\_link | Self-link for compute instance. | +| name | Name of the instance templates. | +| self\_link | Self-link to the instance template. | +| suffix | Suffix used as an identifier for resources. | + + diff --git a/examples/confidential_gpu/confidential_gpu_activator.sh b/examples/confidential_gpu/confidential_gpu_activator.sh new file mode 100644 index 00000000..aca33881 --- /dev/null +++ b/examples/confidential_gpu/confidential_gpu_activator.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This script is used to activate Confidential GPU on a Google Cloud VM instance. +# It installs necessary packages, GPU drivers, and configures the Linux Kernel Crypto API (LKCA). +# Runs only once on the first startup of the VM instance. + +FLAG_FILE_1="/var/log/confidential-gpu-script-part-1-ran.flag" + +echo "Running startup script to activate Confidential GPU..." + +# Check if the flag file exists to determine if the script has already run. +if ! [ -f "$FLAG_FILE_1" ]; then + echo "Running part 1 of the startup script..." + # Update package list and install necessary packages. + sudo apt-get update -y + sudo apt-get install linux-headers-"$(uname -r)" -y + sudo apt install -y build-essential libxml2 libncurses5-dev pkg-config libvulkan1 gcc-12 -y + + # Install GPU drivers. + sudo apt install linux-modules-nvidia-550-server-open-gcp nvidia-driver-550-server-open -y + + # Create a flag file to indicate that the part 1 of the script has run. + touch "$FLAG_FILE_1" + sudo reboot +fi + +FLAG_FILE_2="/var/log/confidential-gpu-script-part-2-ran.flag" + +if ! [ -f "$FLAG_FILE_2" ]; then + echo "Running part 2 of the startup script..." + + # Configure a secure communication between the GPU and the GPU driver, by enabling the Linux Kernel Crypto API (LKCA). + echo "install nvidia /sbin/modprobe ecdsa_generic; /sbin/modprobe ecdh; /sbin/modprobe --ignore-install nvidia" | sudo tee /etc/modprobe.d/nvidia-lkca.conf + sudo update-initramfs -u + + # Enable persistence mode. + sudo test -f /usr/lib/systemd/system/nvidia-persistenced.service && sudo sed -i "s/no-persistence-mode/uvm-persistence-mode/g" /usr/lib/systemd/system/nvidia-persistenced.service + sudo systemctl daemon-reload + + # Create a flag file to indicate that the part 2 of the script has run. + sudo touch "$FLAG_FILE_2" + + # Reboot the VM instance to apply LKCA and persistence mode configurations. + sudo reboot +fi + +FLAG_FILE_3="/var/log/confidential-gpu-script-part-3-ran.flag" +if [ -f "$FLAG_FILE_3" ]; then + echo "Script has already run. Skipping..." + exit 0 +fi + +# Set GPU to ready state after each reboot. +sudo nvidia-smi conf-compute -srs 1 + +touch "$FLAG_FILE_3" +echo "Confidential GPU activation script has completed successfully." diff --git a/examples/confidential_gpu/main.tf b/examples/confidential_gpu/main.tf new file mode 100644 index 00000000..eb5e4ae0 --- /dev/null +++ b/examples/confidential_gpu/main.tf @@ -0,0 +1,117 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + default_suffix = var.suffix == "" ? random_string.suffix.result : "${random_string.suffix.result}-${var.suffix}" + key_name = "${var.key}-${local.default_suffix}" +} + +resource "random_string" "suffix" { + length = 4 + special = false + upper = false +} + +module "kms" { + source = "terraform-google-modules/kms/google" + version = "~> 3.0" + + keyring = "${var.keyring}-${local.default_suffix}" + location = var.location + project_id = var.project_id + keys = [local.key_name] + purpose = "ENCRYPT_DECRYPT" + key_protection_level = "HSM" + prevent_destroy = false +} + +resource "google_service_account" "default" { + project = var.project_id + account_id = "confidential-gpu-sa" + display_name = "Custom SA for confidential VM Instance" +} + +resource "google_project_iam_member" "service_account_roles" { + for_each = toset(var.service_account_roles) + + project = var.project_id + role = each.key + member = "serviceAccount:${google_service_account.default.email}" +} + +data "google_project" "project" { + project_id = var.project_id +} + +resource "google_compute_address" "ip_address" { + name = "external-ip-${local.default_suffix}" + project = var.project_id + region = var.region +} + +locals { + access_config = { + nat_ip = google_compute_address.ip_address.address + network_tier = "PREMIUM" + } +} + +resource "google_kms_crypto_key_iam_binding" "crypto_key" { + crypto_key_id = module.kms.keys[local.key_name] + role = "roles/cloudkms.cryptoKeyEncrypterDecrypter" + members = [ + "serviceAccount:service-${data.google_project.project.number}@compute-system.iam.gserviceaccount.com", + ] +} + +module "instance_template" { + source = "terraform-google-modules/vm/google//modules/instance_template" + version = "~> 13.0" + + region = var.region + project_id = var.project_id + subnetwork = var.subnetwork + access_config = [local.access_config] + + name_prefix = "confidential-gpu-template" + machine_type = "a3-highgpu-1g" + source_image_project = "ubuntu-os-cloud" + source_image = "ubuntu-2204-lts" + enable_confidential_vm = true + confidential_instance_type = "TDX" + disk_size_gb = 20 + disk_type = "pd-ssd" + spot = true + + startup_script = file("${path.module}/confidential_gpu_activator.sh") + + service_account = { + email = google_service_account.default.email + scopes = ["cloud-platform"] + } + disk_encryption_key = module.kms.keys[local.key_name] +} + +module "compute_instance" { + source = "terraform-google-modules/vm/google//modules/compute_instance" + version = "~> 13.0" + + region = var.region + access_config = [local.access_config] + hostname = "confidential-gpu-instance" + instance_template = module.instance_template.self_link + deletion_protection = false +} diff --git a/examples/confidential_gpu/outputs.tf b/examples/confidential_gpu/outputs.tf new file mode 100644 index 00000000..6c321329 --- /dev/null +++ b/examples/confidential_gpu/outputs.tf @@ -0,0 +1,36 @@ +# /** +# * Copyright 2025 Google LLC +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + + +output "self_link" { + description = "Self-link to the instance template." + value = module.instance_template.self_link +} + +output "name" { + description = "Name of the instance templates." + value = module.instance_template.name +} + +output "instance_self_link" { + description = "Self-link for compute instance." + value = module.compute_instance.instances_self_links[0] +} + +output "suffix" { + description = "Suffix used as an identifier for resources." + value = local.default_suffix +} diff --git a/examples/confidential_gpu/variables.tf b/examples/confidential_gpu/variables.tf new file mode 100644 index 00000000..6fe70f28 --- /dev/null +++ b/examples/confidential_gpu/variables.tf @@ -0,0 +1,59 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "project_id" { + description = "The Google Cloud project ID." + type = string +} + +variable "region" { + description = "The GCP region to create and test resources in." + type = string + default = "us-central1" +} + +variable "subnetwork" { + description = "The subnetwork selflink to host the compute instances in." + type = string +} + +variable "location" { + description = "Location for the resources (keyring, key, network, etc.)." + type = string + default = "us" +} + +variable "suffix" { + description = "A suffix to be used as an identifier for resources. (e.g., suffix for KMS Key, Keyring)." + type = string + default = "" +} + +variable "keyring" { + description = "Keyring name." + type = string +} + +variable "key" { + description = "Key name." + type = string +} + +variable "service_account_roles" { + description = "Predefined roles for the Service account that will be created for the VM. Remember to follow principles of least privileges with Cloud IAM." + type = list(string) + default = [] +} diff --git a/metadata.yaml b/metadata.yaml index 42fb4313..dabd0086 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -54,6 +54,8 @@ spec: location: examples/instance_template/confidential_computing - name: confidential_computing_intel location: examples/confidential_computing_intel + - name: confidential_gpu + location: examples/confidential_gpu - name: disk_snapshot location: examples/compute_instance/disk_snapshot - name: encrypted_disks @@ -101,9 +103,11 @@ spec: - roles/iam.serviceAccountAdmin - roles/compute.instanceAdmin - roles/resourcemanager.projectIamAdmin + - roles/cloudkms.admin services: - cloudresourcemanager.googleapis.com - storage-api.googleapis.com - serviceusage.googleapis.com - compute.googleapis.com - iam.googleapis.com + - cloudkms.googleapis.com diff --git a/modules/compute_disk_snapshot/metadata.yaml b/modules/compute_disk_snapshot/metadata.yaml index 25ea2111..3b2f3649 100644 --- a/modules/compute_disk_snapshot/metadata.yaml +++ b/modules/compute_disk_snapshot/metadata.yaml @@ -44,6 +44,8 @@ spec: location: examples/instance_template/confidential_computing - name: confidential_computing_intel location: examples/confidential_computing_intel + - name: confidential_gpu + location: examples/confidential_gpu - name: disk_snapshot location: examples/compute_instance/disk_snapshot - name: encrypted_disks @@ -167,12 +169,14 @@ spec: - roles/iam.serviceAccountAdmin - roles/compute.instanceAdmin - roles/resourcemanager.projectIamAdmin + - roles/cloudkms.admin services: - cloudresourcemanager.googleapis.com - storage-api.googleapis.com - serviceusage.googleapis.com - compute.googleapis.com - iam.googleapis.com + - cloudkms.googleapis.com providerVersions: - source: hashicorp/google version: ">= 3.71, < 7" diff --git a/modules/compute_instance/metadata.yaml b/modules/compute_instance/metadata.yaml index f31909a1..a59f4f4f 100644 --- a/modules/compute_instance/metadata.yaml +++ b/modules/compute_instance/metadata.yaml @@ -44,6 +44,8 @@ spec: location: examples/instance_template/confidential_computing - name: confidential_computing_intel location: examples/confidential_computing_intel + - name: confidential_gpu + location: examples/confidential_gpu - name: disk_snapshot location: examples/compute_instance/disk_snapshot - name: encrypted_disks @@ -178,12 +180,14 @@ spec: - roles/iam.serviceAccountAdmin - roles/compute.instanceAdmin - roles/resourcemanager.projectIamAdmin + - roles/cloudkms.admin services: - cloudresourcemanager.googleapis.com - storage-api.googleapis.com - serviceusage.googleapis.com - compute.googleapis.com - iam.googleapis.com + - cloudkms.googleapis.com providerVersions: - source: hashicorp/google version: ">= 3.88, < 7" diff --git a/modules/instance_template/metadata.yaml b/modules/instance_template/metadata.yaml index 7eaa79b2..1389b2c9 100644 --- a/modules/instance_template/metadata.yaml +++ b/modules/instance_template/metadata.yaml @@ -44,6 +44,8 @@ spec: location: examples/instance_template/confidential_computing - name: confidential_computing_intel location: examples/confidential_computing_intel + - name: confidential_gpu + location: examples/confidential_gpu - name: disk_snapshot location: examples/compute_instance/disk_snapshot - name: encrypted_disks @@ -485,12 +487,14 @@ spec: - roles/iam.serviceAccountAdmin - roles/compute.instanceAdmin - roles/resourcemanager.projectIamAdmin + - roles/cloudkms.admin services: - cloudresourcemanager.googleapis.com - storage-api.googleapis.com - serviceusage.googleapis.com - compute.googleapis.com - iam.googleapis.com + - cloudkms.googleapis.com providerVersions: - source: hashicorp/google-beta version: ">= 5.36, < 7" diff --git a/modules/mig/metadata.yaml b/modules/mig/metadata.yaml index a7745d16..dadeaf0f 100644 --- a/modules/mig/metadata.yaml +++ b/modules/mig/metadata.yaml @@ -44,6 +44,8 @@ spec: location: examples/instance_template/confidential_computing - name: confidential_computing_intel location: examples/confidential_computing_intel + - name: confidential_gpu + location: examples/confidential_gpu - name: disk_snapshot location: examples/compute_instance/disk_snapshot - name: encrypted_disks @@ -322,12 +324,14 @@ spec: - roles/iam.serviceAccountAdmin - roles/compute.instanceAdmin - roles/resourcemanager.projectIamAdmin + - roles/cloudkms.admin services: - cloudresourcemanager.googleapis.com - storage-api.googleapis.com - serviceusage.googleapis.com - compute.googleapis.com - iam.googleapis.com + - cloudkms.googleapis.com providerVersions: - source: hashicorp/google version: ">= 4.48, < 7" diff --git a/modules/mig_with_percent/metadata.yaml b/modules/mig_with_percent/metadata.yaml index 77dd4f69..bbbc237f 100644 --- a/modules/mig_with_percent/metadata.yaml +++ b/modules/mig_with_percent/metadata.yaml @@ -44,6 +44,8 @@ spec: location: examples/instance_template/confidential_computing - name: confidential_computing_intel location: examples/confidential_computing_intel + - name: confidential_gpu + location: examples/confidential_gpu - name: disk_snapshot location: examples/compute_instance/disk_snapshot - name: encrypted_disks @@ -309,12 +311,14 @@ spec: - roles/iam.serviceAccountAdmin - roles/compute.instanceAdmin - roles/resourcemanager.projectIamAdmin + - roles/cloudkms.admin services: - cloudresourcemanager.googleapis.com - storage-api.googleapis.com - serviceusage.googleapis.com - compute.googleapis.com - iam.googleapis.com + - cloudkms.googleapis.com providerVersions: - source: hashicorp/google version: ">= 4.48, < 7" diff --git a/modules/preemptible_and_regular_instance_templates/metadata.yaml b/modules/preemptible_and_regular_instance_templates/metadata.yaml index b37967e8..eca1df8a 100644 --- a/modules/preemptible_and_regular_instance_templates/metadata.yaml +++ b/modules/preemptible_and_regular_instance_templates/metadata.yaml @@ -44,6 +44,8 @@ spec: location: examples/instance_template/confidential_computing - name: confidential_computing_intel location: examples/confidential_computing_intel + - name: confidential_gpu + location: examples/confidential_gpu - name: disk_snapshot location: examples/compute_instance/disk_snapshot - name: encrypted_disks @@ -209,9 +211,11 @@ spec: - roles/iam.serviceAccountAdmin - roles/compute.instanceAdmin - roles/resourcemanager.projectIamAdmin + - roles/cloudkms.admin services: - cloudresourcemanager.googleapis.com - storage-api.googleapis.com - serviceusage.googleapis.com - compute.googleapis.com - iam.googleapis.com + - cloudkms.googleapis.com diff --git a/modules/umig/metadata.yaml b/modules/umig/metadata.yaml index 40b519b3..50fe6809 100644 --- a/modules/umig/metadata.yaml +++ b/modules/umig/metadata.yaml @@ -44,6 +44,8 @@ spec: location: examples/instance_template/confidential_computing - name: confidential_computing_intel location: examples/confidential_computing_intel + - name: confidential_gpu + location: examples/confidential_gpu - name: disk_snapshot location: examples/compute_instance/disk_snapshot - name: encrypted_disks @@ -186,12 +188,14 @@ spec: - roles/iam.serviceAccountAdmin - roles/compute.instanceAdmin - roles/resourcemanager.projectIamAdmin + - roles/cloudkms.admin services: - cloudresourcemanager.googleapis.com - storage-api.googleapis.com - serviceusage.googleapis.com - compute.googleapis.com - iam.googleapis.com + - cloudkms.googleapis.com providerVersions: - source: hashicorp/google version: ">= 3.88, < 7" diff --git a/test/fixtures/confidential_gpu/main.tf b/test/fixtures/confidential_gpu/main.tf new file mode 100644 index 00000000..dfe4852a --- /dev/null +++ b/test/fixtures/confidential_gpu/main.tf @@ -0,0 +1,25 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module "confidential_gpu" { + source = "../../../examples/confidential_gpu" + project_id = var.project_id + region = "us-east5" + subnetwork = google_compute_subnetwork.main.self_link + keyring = "key-ring-test" + key = "key-test" + service_account_roles = ["roles/compute.imageUser", "roles/compute.networkUser"] +} diff --git a/test/fixtures/confidential_gpu/network.tf b/test/fixtures/confidential_gpu/network.tf new file mode 100644 index 00000000..298defd0 --- /dev/null +++ b/test/fixtures/confidential_gpu/network.tf @@ -0,0 +1,48 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +resource "random_string" "suffix" { + length = 4 + special = "false" + upper = "false" +} + +resource "google_compute_network" "main" { + project = var.project_id + name = "cft-vm-test-${random_string.suffix.result}" + auto_create_subnetworks = "false" +} + +resource "google_compute_firewall" "allow_ssh" { + project = var.project_id + name = "allow-ssh" + network = google_compute_network.main.self_link + + source_ranges = ["0.0.0.0/0"] + + allow { + protocol = "tcp" + ports = ["22"] + } +} + +resource "google_compute_subnetwork" "main" { + project = var.project_id + region = "us-east5" + name = "cft-vm-test-${random_string.suffix.result}" + ip_cidr_range = "10.128.0.0/20" + network = google_compute_network.main.self_link +} diff --git a/test/fixtures/confidential_gpu/outputs.tf b/test/fixtures/confidential_gpu/outputs.tf new file mode 100644 index 00000000..02eae168 --- /dev/null +++ b/test/fixtures/confidential_gpu/outputs.tf @@ -0,0 +1,40 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "self_link" { + description = "Self-link to the instance template." + value = module.confidential_gpu.self_link +} + +output "name" { + description = "Name of the instance templates." + value = module.confidential_gpu.name +} + +output "instance_self_link" { + description = "Self-link for compute instance" + value = module.confidential_gpu.instance_self_link +} + +output "project_id" { + description = "The GCP project to use for integration tests." + value = var.project_id +} + +output "suffix" { + description = "Suffix used as an identifier for resources." + value = module.confidential_gpu.suffix +} diff --git a/test/fixtures/confidential_gpu/variables.tf b/test/fixtures/confidential_gpu/variables.tf new file mode 100644 index 00000000..5d5c828e --- /dev/null +++ b/test/fixtures/confidential_gpu/variables.tf @@ -0,0 +1,20 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "project_id" { + description = "The GCP project to use for integration tests." + type = string +} diff --git a/test/fixtures/confidential_gpu/versions.tf b/test/fixtures/confidential_gpu/versions.tf new file mode 100644 index 00000000..3c1e28d6 --- /dev/null +++ b/test/fixtures/confidential_gpu/versions.tf @@ -0,0 +1,19 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +terraform { + required_version = ">=0.13" +} diff --git a/test/integration/confidential_gpu/confidential_gpu_test.go b/test/integration/confidential_gpu/confidential_gpu_test.go new file mode 100644 index 00000000..5b6d3000 --- /dev/null +++ b/test/integration/confidential_gpu/confidential_gpu_test.go @@ -0,0 +1,88 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package confidential_gpu + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/GoogleCloudPlatform/cloud-foundation-toolkit/infra/blueprint-test/pkg/gcloud" + "github.com/GoogleCloudPlatform/cloud-foundation-toolkit/infra/blueprint-test/pkg/tft" + "github.com/stretchr/testify/assert" +) + +func TestConfidentialGpu(t *testing.T) { + const instanceNamePrefix = "confidential-gpu-instance" + + confCompInst := tft.NewTFBlueprintTest(t) + confCompInst.DefineVerify(func(assert *assert.Assertions) { + confCompInst.DefaultVerify(assert) + projectId := confCompInst.GetStringOutput("project_id") + + computeInstanceList := gcloud.Run(t, fmt.Sprintf("compute instances list --format=json --project %s --filter name~%s", projectId, instanceNamePrefix)) + + assert.Len(computeInstanceList.Array(), 1) + computeInstance := computeInstanceList.Array()[0] + confidentialInstanceConfig := computeInstance.Get("confidentialInstanceConfig") + assert.True(confidentialInstanceConfig.Get("enableConfidentialCompute").Bool()) + assert.Equal("TDX", confidentialInstanceConfig.Get("confidentialInstanceType").String()) + assert.Equal("TERMINATE", computeInstance.Get("scheduling").Get("onHostMaintenance").String()) + serviceAccounts := computeInstance.Get("serviceAccounts").Array() + assert.Len(serviceAccounts, 1) + assert.Equal(fmt.Sprintf("confidential-gpu-sa@%s.iam.gserviceaccount.com", projectId), serviceAccounts[0].Get("email").String()) + serviceAccountBindings := gcloud.Runf(t, "projects get-iam-policy %s --flatten bindings --filter bindings.members:'serviceAccount:%s' --format json", projectId, serviceAccounts[0].Get("email").String()).Array() + assert.Equal(2, len(serviceAccountBindings), "expect two bindings") + assert.ElementsMatch([]string{"roles/compute.imageUser", "roles/compute.networkUser"}, []string{serviceAccountBindings[0].Get("bindings.role").String(), serviceAccountBindings[1].Get("bindings.role").String()}) + disks := computeInstance.Get("disks").Array() + assert.Len(disks, 3) + defaultSuffix := confCompInst.GetStringOutput("suffix") + assert.Equal(fmt.Sprintf("projects/%s/locations/us/keyRings/key-ring-test-%s/cryptoKeys/key-test-%s/cryptoKeyVersions/1", projectId, defaultSuffix, defaultSuffix), disks[0].Get("diskEncryptionKey").Get("kmsKeyName").String()) + + instanceName := computeInstance.Get("name").String() + fullZoneName := strings.Split(computeInstance.Get("zone").String(), "/") + zone := fullZoneName[len(fullZoneName)-1] + + for count := 0; count < 100; count++ { + command := fmt.Sprintf("compute ssh %s --project %s --zone %s -q --command='journalctl -u google-startup-scripts.service -n 20'", instanceName, projectId, zone) + logs, err := gcloud.RunCmdE(t, command) + if err != nil { + errorMsg := err.Error() + println("ERROR: Unable to retrieve logs from the instance. Retrying...", errorMsg) + time.Sleep(20 * time.Second) + continue + } + + if strings.Contains(logs, "startup-script: Confidential GPU activation script has completed successfully.") { + if strings.Contains(logs, "startup-script exit status 1") { + t.Fatal("ERROR: Startup Script finished with invalid exit status.") + } + break + } + + if count == 99 { + t.Fatal("ERROR: Startup Script did not complete successfully within the expected time frame.") + } + + time.Sleep(12 * time.Second) + } + ccStatusLogs := gcloud.RunCmd(t, fmt.Sprintf("compute ssh %s --project %s --zone %s -q --command='sudo nvidia-smi conf-compute -f'", instanceName, projectId, zone)) + assert.Contains(ccStatusLogs, "CC status: ON") + ccStateLogs := gcloud.RunCmd(t, fmt.Sprintf("compute ssh %s --project %s --zone %s -q --command='sudo nvidia-smi conf-compute -grs'", instanceName, projectId, zone)) + assert.Contains(ccStateLogs, "Confidential Compute GPUs Ready state: ready") + }) + confCompInst.Test() +} diff --git a/test/setup/iam.tf b/test/setup/iam.tf index 64c1d6e0..0de6fb2b 100644 --- a/test/setup/iam.tf +++ b/test/setup/iam.tf @@ -22,6 +22,7 @@ locals { "roles/iam.serviceAccountAdmin", "roles/compute.instanceAdmin", "roles/resourcemanager.projectIamAdmin", + "roles/cloudkms.admin", ] } diff --git a/test/setup/main.tf b/test/setup/main.tf index f209e293..3a2d1b6b 100644 --- a/test/setup/main.tf +++ b/test/setup/main.tf @@ -30,5 +30,6 @@ module "project_ci_vm" { "serviceusage.googleapis.com", "compute.googleapis.com", "iam.googleapis.com", + "cloudkms.googleapis.com", ] }