Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions infra/gcp/terraform/k8s-infra-prow-build/iam.tf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module "iam" {
"roles/secretmanager.secretAccessor" = [
"serviceAccount:kubernetes-external-secrets@k8s-infra-prow-build.iam.gserviceaccount.com",
"principal://iam.googleapis.com/projects/${module.project.project_number}/locations/global/workloadIdentityPools/${module.project.project_id}.svc.id.goog/subject/ns/external-secrets/sa/external-secrets",
"principal://iam.googleapis.com/projects/180382678033/locations/global/workloadIdentityPools/k8s-infra-prow-build-trusted.svc.id.goog/subject/ns/external-secrets/sa/external-secrets",
]
}
}
Expand Down
85 changes: 25 additions & 60 deletions infra/gcp/terraform/k8s-infra-prow-build/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,6 @@ resource "google_project_iam_member" "k8s_infra_prow_viewers" {
member = "group:[email protected]"
}

// Allow prow-deployer service account in k8s-infra-prow-build-trusted to deploy
// to the cluster defined in here
resource "google_project_iam_member" "prow_deployer_for_prow_build" {
project = module.project.project_id
role = "roles/container.admin"
member = "serviceAccount:prow-deployer@k8s-infra-prow-build-trusted.iam.gserviceaccount.com"
}

module "prow_build_cluster" {
source = "../modules/gke-cluster"
project_name = module.project.project_id
Expand All @@ -86,18 +78,30 @@ module "prow_build_nodepool_c4_highmem_8_localssd" {
cluster_name = module.prow_build_cluster.cluster.name
location = module.prow_build_cluster.cluster.location
node_locations = [
"us-central1-a",
"us-central1-b",
"us-central1-c",
"us-central1-f",
]
name = "pool6"
initial_count = 1
min_count = 1
max_count = 80
machine_type = "c4-highmem-8"
disk_size_gb = 500
disk_type = "hyperdisk-balanced"
service_account = module.prow_build_cluster.cluster_node_sa.email
name = "pool6"
initial_count = 1
min_count = 1
max_count = 250 # total across all zones
machine_type = "c4-highmem-8-lssd"
disk_size_gb = 100
disk_type = "hyperdisk-balanced"
enable_nested_virtualization = true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the use case? Nested virt can have poor performance, create noise for neighbors, and we need to make sure the VMs are not leaked ...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ACK, in kind we also use actions for this, which is mostly ~fine.

service_account = module.prow_build_cluster.cluster_node_sa.email
// This taint exists to bias workloads on to the C4D nodepool first, if we can't secure a C4D node
// then we schedule on to a C4 node. C4D performs better than C4 but it is capacity constrained at times.
// Also, nested virt doesn't work on C4D or C4A
taints = [
{
key = "spare"
value = "true"
effect = "PREFER_NO_SCHEDULE"
}
]
}

module "prow_build_nodepool_c4d_highmem_8_localssd" {
Expand All @@ -113,53 +117,13 @@ module "prow_build_nodepool_c4d_highmem_8_localssd" {
name = "pool7"
initial_count = 1
min_count = 10
max_count = 80
machine_type = "c4d-highmem-8-lssd" # has 2 local ssd disks attached
max_count = 250 # total across all zones
machine_type = "c4d-highmem-8-lssd" # has 1 local ssd disks attached
disk_size_gb = 100
disk_type = "hyperdisk-balanced"
service_account = module.prow_build_cluster.cluster_node_sa.email
}


module "sig_node_node_pool_1_n4_highmem_8" {

source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric//modules/gke-nodepool?ref=v39.0.0&depth=1"
project_id = module.project.project_id
name = "sig-node-pool1"
location = module.prow_build_cluster.cluster.location
cluster_name = module.prow_build_cluster.cluster.name

service_account = {
email = module.prow_build_cluster.cluster_node_sa.email
oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
}

nodepool_config = {
autoscaling = {
max_node_count = 10
min_node_count = 1 # 1 per zone
}
management = {
auto_repair = true
auto_upgrade = true
}
}

node_config = {
machine_type = "n4-highmem-8"
disk_type = "hyperdisk-balanced"
image_type = "COS_CONTAINERD"
gvnic = true
workload_metadata_config_mode = "GKE_METADATA"
shielded_instance_config = {
enable_secure_boot = true
}
}


taints = { dedicated = { value = "sig-node", effect = "NO_SCHEDULE" } }
}

module "prow_build_nodepool_c4a_highmem_8_localssd" {
source = "../modules/gke-nodepool"
project_name = module.project.project_id
Expand All @@ -169,11 +133,12 @@ module "prow_build_nodepool_c4a_highmem_8_localssd" {
"us-central1-a",
"us-central1-b",
"us-central1-c",
"us-central1-f",
]
name = "pool7-arm64"
initial_count = 1
min_count = 1
max_count = 10
min_count = 3
max_count = 100 # total across all zones
machine_type = "c4a-highmem-8-lssd" # has 2 local ssd disks attached
disk_size_gb = 100
disk_type = "hyperdisk-balanced"
Expand Down
12 changes: 7 additions & 5 deletions infra/gcp/terraform/k8s-infra-prow-build/peering.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ limitations under the License.
*/

resource "google_vmwareengine_network_peering" "gvce_peering" {
name = "peer-with-gcve-project"
peer_network = "projects/k8s-infra-prow-build/global/networks/default"
project = module.project.project_id
peer_network_type = "STANDARD"
vmware_engine_network = "projects/broadcom-451918/locations/global/vmwareEngineNetworks/k8s-gcp-gcve-network"
name = "peer-with-gcve-project"
peer_network = "projects/k8s-infra-prow-build/global/networks/default"
project = module.project.project_id
peer_network_type = "STANDARD"
vmware_engine_network = "projects/broadcom-451918/locations/global/vmwareEngineNetworks/k8s-gcp-gcve-network"
export_custom_routes_with_public_ip = true
import_custom_routes_with_public_ip = true
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.31.0"
version = "~> 7.7.0"
}
google-beta = {
source = "hashicorp/google-beta"
version = "~> 6.31.0"
version = "~> 7.7.0"
}
}
}
4 changes: 2 additions & 2 deletions infra/gcp/terraform/modules/gke-cluster/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.31.0"
version = ">=6.31.0"
}
google-beta = {
source = "hashicorp/google-beta"
version = "~> 6.31.0"
version = ">=6.31.0"
}
}
}
19 changes: 17 additions & 2 deletions infra/gcp/terraform/modules/gke-nodepool/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,17 @@ resource "google_container_node_pool" "node_pool" {
auto_upgrade = true
}

upgrade_settings {
max_unavailable = 0
max_surge = 10
}

// Autoscale the cluster as needed. Note if using a regional cluster these values will be multiplied by 3
initial_node_count = var.initial_count
autoscaling {
min_node_count = var.min_count
max_node_count = var.max_count
total_min_node_count = var.min_count
total_max_node_count = var.max_count
location_policy = "ANY"
}
node_locations = var.node_locations

Expand All @@ -49,6 +55,9 @@ resource "google_container_node_pool" "node_pool" {

service_account = var.service_account
oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
kubelet_config {
single_process_oom_kill = true # https://github.com/kubernetes-sigs/prow/issues/210
}

dynamic "ephemeral_storage_config" {
for_each = var.ephemeral_local_ssd_count > 0 ? [var.ephemeral_local_ssd_count] : []
Expand All @@ -57,6 +66,11 @@ resource "google_container_node_pool" "node_pool" {
}
}

advanced_machine_features {
enable_nested_virtualization = var.enable_nested_virtualization
threads_per_core = 0
}

// Needed for workload identity
workload_metadata_config {
mode = "GKE_METADATA"
Expand All @@ -72,6 +86,7 @@ resource "google_container_node_pool" "node_pool" {
value = taint.value.value
}
}

}

// If we need to destroy the node pool, create the new one before destroying
Expand Down
6 changes: 6 additions & 0 deletions infra/gcp/terraform/modules/gke-nodepool/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,9 @@ variable "service_account" {
description = "The email address of the GCP Service Account to be associated with nodes in this node_pool"
type = string
}

variable "enable_nested_virtualization" {
description = "Whether to enable nested virtualization on the node pool's VMs"
type = bool
default = false
}
4 changes: 2 additions & 2 deletions infra/gcp/terraform/modules/gke-nodepool/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.31.0"
version = ">=6.31.0"
}
google-beta = {
source = "hashicorp/google-beta"
version = "~> 6.31.0"
version = ">=6.31.0"
}
}
}
4 changes: 2 additions & 2 deletions infra/gcp/terraform/modules/gke-project/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.31.0"
version = ">=6.31.0"
}
google-beta = {
source = "hashicorp/google-beta"
version = "~> 6.31.0"
version = ">=6.31.0"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.31.0"
version = ">=6.31.0"
}
google-beta = {
source = "hashicorp/google-beta"
version = "~> 6.31.0"
version = ">=6.31.0"
}
}
}