diff --git a/community/modules/internal/slurm-gcp/instance_template/README.md b/community/modules/internal/slurm-gcp/instance_template/README.md index 6baf8180e7..f9d999918d 100644 --- a/community/modules/internal/slurm-gcp/instance_template/README.md +++ b/community/modules/internal/slurm-gcp/instance_template/README.md @@ -34,6 +34,7 @@ | [advanced\_machine\_features](#input\_advanced\_machine\_features) | See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance_template#nested_advanced_machine_features |
object({
enable_nested_virtualization = optional(bool)
threads_per_core = optional(number)
turbo_mode = optional(string)
visible_core_count = optional(number)
performance_monitoring_unit = optional(string)
enable_uefi_networking = optional(bool)
})
| n/a | yes | | [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `virtio_enabled` setting will only enable VirtioNet and will not enable TIER\_1.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image that supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example. | `bool` | `false` | no | +| [controller\_save\_disk\_self\_link](#input\_controller\_save\_disk\_self\_link) | The id of the encryption key that is stored in Google Cloud KMS to use to encrypt all the disks on this instance | `string` | `null` | no | | [disk\_auto\_delete](#input\_disk\_auto\_delete) | Whether or not the boot disk should be auto-deleted. | `bool` | `true` | no | | [disk\_labels](#input\_disk\_labels) | Labels to be assigned to boot disk, provided as a map. | `map(string)` | `{}` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB. | `number` | `100` | no | diff --git a/community/modules/internal/slurm-gcp/instance_template/main.tf b/community/modules/internal/slurm-gcp/instance_template/main.tf index 87a0f4ba56..945e3e3b20 100644 --- a/community/modules/internal/slurm-gcp/instance_template/main.tf +++ b/community/modules/internal/slurm-gcp/instance_template/main.tf @@ -88,6 +88,8 @@ module "instance_template" { project_id = var.project_id + controller_save_disk_self_link = var.controller_save_disk_self_link + # Network can_ip_forward = var.can_ip_forward network_ip = var.network_ip diff --git a/community/modules/internal/slurm-gcp/instance_template/variables.tf b/community/modules/internal/slurm-gcp/instance_template/variables.tf index 360c5f3c71..22d4f55398 100644 --- a/community/modules/internal/slurm-gcp/instance_template/variables.tf +++ b/community/modules/internal/slurm-gcp/instance_template/variables.tf @@ -319,6 +319,12 @@ variable "disk_auto_delete" { default = true } +variable "controller_save_disk_self_link" { + description = "The id of the encryption key that is stored in Google Cloud KMS to use to encrypt all the disks on this instance" + type = string + default = null +} + variable "additional_disks" { type = list(object({ disk_name = string diff --git a/community/modules/internal/slurm-gcp/internal_instance_template/README.md b/community/modules/internal/slurm-gcp/internal_instance_template/README.md index 1d07b23e7c..83e29b72fb 100644 --- a/community/modules/internal/slurm-gcp/internal_instance_template/README.md +++ b/community/modules/internal/slurm-gcp/internal_instance_template/README.md @@ -37,6 +37,7 @@ No modules. | [auto\_delete](#input\_auto\_delete) | Whether or not the boot disk should be auto-deleted | `string` | `"true"` | no | | [automatic\_restart](#input\_automatic\_restart) | (Optional) Specifies whether the instance should be automatically restarted if it is terminated by Compute Engine (not terminated by a user). | `bool` | `true` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example | `string` | `"false"` | no | +| [controller\_save\_disk\_self\_link](#input\_controller\_save\_disk\_self\_link) | The id of the encryption key that is stored in Google Cloud KMS to use to encrypt all the disks on this instance | `string` | `null` | no | | [disk\_encryption\_key](#input\_disk\_encryption\_key) | The id of the encryption key that is stored in Google Cloud KMS to use to encrypt all the disks on this instance | `string` | `null` | no | | [disk\_labels](#input\_disk\_labels) | Labels to be assigned to boot disk, provided as a map | `map(string)` | `{}` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB | `string` | `"100"` | no | diff --git a/community/modules/internal/slurm-gcp/internal_instance_template/main.tf b/community/modules/internal/slurm-gcp/internal_instance_template/main.tf index 8395998201..3b0692a954 100644 --- a/community/modules/internal/slurm-gcp/internal_instance_template/main.tf +++ b/community/modules/internal/slurm-gcp/internal_instance_template/main.tf @@ -204,4 +204,14 @@ resource "google_compute_instance_template" "tpl" { count = guest_accelerator.value.count } } + + dynamic "disk" { + for_each = var.controller_save_disk_self_link != null ? ["unit"] : [] + content { + source = var.controller_save_disk_self_link + device_name = "controller-state-save" + auto_delete = false + } + } + } diff --git a/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf b/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf index 26cb523d84..fd352d870e 100644 --- a/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf +++ b/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf @@ -107,6 +107,7 @@ variable "advanced_machine_features" { }) } + ####### # disk ####### @@ -158,6 +159,12 @@ variable "auto_delete" { default = "true" } +variable "controller_save_disk_self_link" { + description = "The id of the encryption key that is stored in Google Cloud KMS to use to encrypt all the disks on this instance" + type = string + default = null +} + variable "additional_disks" { description = "List of maps of additional disks. See https://www.terraform.io/docs/providers/google/r/compute_instance_template#disk_name" type = list(object({ diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index 341a7605f0..ecb142479f 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -264,6 +264,7 @@ limitations under the License. | Name | Type | |------|------| +| [google_compute_disk.controller_disk](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource | | [google_compute_instance_from_template.controller](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance_from_template) | resource | | [google_secret_manager_secret.cloudsql](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/secret_manager_secret) | resource | | [google_secret_manager_secret_iam_member.cloudsql_secret_accessor](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/secret_manager_secret_iam_member) | resource | @@ -291,6 +292,7 @@ limitations under the License. | [compute\_startup\_scripts\_timeout](#input\_compute\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in compute\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | | [controller\_startup\_script](#input\_controller\_startup\_script) | Startup script used by the controller VM. | `string` | `"# no-op"` | no | | [controller\_startup\_scripts\_timeout](#input\_controller\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in controller\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | +| [controller\_state\_disk](#input\_controller\_state\_disk) | A disk that will be attached to the controller instance template to save state of slurm. The disk is created and used by default.
To disable this feature, set this variable to null.

NOTE: This will not save the contents at /opt/apps and /home. To preserve those, they must be saved externally. |
object({
type = string
size = number
})
|
{
"size": 50,
"type": "pd-ssd"
}
| no | | [create\_bucket](#input\_create\_bucket) | Create GCS bucket instead of using an existing one. | `bool` | `true` | no | | [deployment\_name](#input\_deployment\_name) | Name of the deployment. | `string` | n/a | yes | | [disable\_controller\_public\_ips](#input\_disable\_controller\_public\_ips) | DEPRECATED: Use `enable_controller_public_ips` instead. | `bool` | `null` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf index 6d4d1b0b07..66ef254184 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf @@ -48,6 +48,15 @@ locals { ) } +resource "google_compute_disk" "controller_disk" { + count = var.controller_state_disk != null ? 1 : 0 + + name = "controller-state-save" + type = var.controller_state_disk.type + size = var.controller_state_disk.size + zone = var.zone +} + # INSTANCE TEMPLATE module "slurm_controller_template" { source = "../../internal/slurm-gcp/instance_template" @@ -64,10 +73,11 @@ module "slurm_controller_template" { disk_type = var.disk_type additional_disks = local.additional_disks - bandwidth_tier = var.bandwidth_tier - slurm_bucket_path = module.slurm_files.slurm_bucket_path - can_ip_forward = var.can_ip_forward - advanced_machine_features = var.advanced_machine_features + controller_save_disk_self_link = var.controller_state_disk != null ? google_compute_disk.controller_disk[0].name : null + bandwidth_tier = var.bandwidth_tier + slurm_bucket_path = module.slurm_files.slurm_bucket_path + can_ip_forward = var.can_ip_forward + advanced_machine_features = var.advanced_machine_features enable_confidential_vm = var.enable_confidential_vm enable_oslogin = var.enable_oslogin diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/README.md index 8b60cbfc45..58c8bd3aca 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/README.md @@ -72,6 +72,7 @@ No modules. | [compute\_startup\_scripts\_timeout](#input\_compute\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in compute\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | | [controller\_startup\_scripts](#input\_controller\_startup\_scripts) | List of scripts to be ran on controller VM startup. |
list(object({
filename = string
content = string
}))
| `[]` | no | | [controller\_startup\_scripts\_timeout](#input\_controller\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in controller\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | +| [controller\_state\_disk](#input\_controller\_state\_disk) | A disk that will be attached to the controller instance template to save state of slurm. The disk is created and used by default.
To disable this feature, set this variable to null.

NOTE: This will not save the contents at /opt/apps and /home. To preserve those, they must be saved externally. |
object({
type = string
size = number
})
|
{
"size": 50,
"type": "pd-ssd"
}
| no | | [disable\_default\_mounts](#input\_disable\_default\_mounts) | Disable default global network storage from the controller
- /usr/local/etc/slurm
- /etc/munge
- /home
- /apps
If these are disabled, the slurm etc and munge dirs must be added manually,
or some other mechanism must be used to synchronize the slurm conf files
and the munge key across the cluster. | `bool` | `false` | no | | [enable\_bigquery\_load](#input\_enable\_bigquery\_load) | Enables loading of cluster job usage into big query.

NOTE: Requires Google Bigquery API. | `bool` | `false` | no | | [enable\_debug\_logging](#input\_enable\_debug\_logging) | Enables debug logging mode. Not for production use. | `bool` | `false` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/main.tf index 7783258299..7b9fe27bc1 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/main.tf @@ -43,14 +43,15 @@ locals { tp = "${local.bucket_dir}/" # prefix to trim from the bucket path to get a "file name" config = { - enable_bigquery_load = var.enable_bigquery_load - cloudsql_secret = var.cloudsql_secret - cluster_id = random_uuid.cluster_id.result - project = var.project_id - slurm_cluster_name = var.slurm_cluster_name - bucket_path = local.bucket_path - enable_debug_logging = var.enable_debug_logging - extra_logging_flags = var.extra_logging_flags + enable_bigquery_load = var.enable_bigquery_load + cloudsql_secret = var.cloudsql_secret + cluster_id = random_uuid.cluster_id.result + project = var.project_id + slurm_cluster_name = var.slurm_cluster_name + bucket_path = local.bucket_path + enable_debug_logging = var.enable_debug_logging + extra_logging_flags = var.extra_logging_flags + controller_state_disk = var.controller_state_disk # storage disable_default_mounts = var.disable_default_mounts diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/setup.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/setup.py index 0637fa569b..0caa3ea58f 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/setup.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/setup.py @@ -176,6 +176,49 @@ def run_custom_scripts(): log.exception(f"script {script} encountered an exception") raise e +def mount_save_state_disk(): + disk_name = f"/dev/disk/by-id/google-controller-state-save" + mount_point = "/var/spool/slurm" + fs_type = "xfs" + + rdevice = util.run(f"realpath {disk_name}").stdout.strip() + file_output = util.run(f"file -s {rdevice}").stdout.strip() + if "filesystem" not in file_output: + util.run(f"mkfs -t {fs_type} -q {rdevice}") + + fstab_entry = f"{disk_name}\t{mount_point}\t{fs_type}\tdefaults\t0 0\n" + with open("/etc/fstab", "r") as f: + fstab = f.readlines() + if fstab_entry not in fstab: + with open("/etc/fstab", "a") as f: + f.write(fstab_entry) + util.run(f"systemctl daemon-reload") + + os.makedirs(mount_point, exist_ok=True) + util.run(f"mount {mount_point}") + + current_user = util.run(f"stat -c %U {mount_point}").stdout.strip() + if current_user != "slurm": + util.run(f"chown -R slurm:slurm {mount_point}") + +def mount_munge_key_disk(): + state_disk_dir = "/var/spool/slurm/munge" + mount_point = "/etc/munge" + + os.makedirs(state_disk_dir, exist_ok=True) + + util.run(f"mount --bind {state_disk_dir} {mount_point}") + + fstab_entry = f"{state_disk_dir} {mount_point} none bind 0 0\n" + with open("/etc/fstab", "r") as f: + fstab = f.readlines() + + if fstab_entry not in fstab: + with open("/etc/fstab", "a") as f: + f.write(fstab_entry) + + util.run(f"systemctl daemon-reload") + def setup_jwt_key(): jwt_key = Path(slurmdirs.state / "jwt_hs256.key") @@ -329,6 +372,11 @@ def setup_controller(): util.chown_slurm(dirs.scripts / "config.yaml", mode=0o600) install_custom_scripts() conf.gen_controller_configs(lookup()) + + if lookup().cfg.controller_state_disk != None: + mount_save_state_disk() + mount_munge_key_disk() + setup_jwt_key() setup_munge_key() setup_sudoers() diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf index 31423c2211..cda446c624 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf @@ -58,6 +58,24 @@ variable "slurm_cluster_name" { } } +variable "controller_state_disk" { + description = <