Skip to content

Commit 3fc6a52

Browse files
nupurg-ibmNupur Goyal
and
Nupur Goyal
authored
feat: Scale storage support on IBM Cloud HPC "/mnt/lsf"<br>* Fix for Intermittent scale mount failures<br>* Custom VPC Share support on "/mnt/lsf"<br>* Test cases upgradation<br>* Tekton COS Integrate<br>* New custom images for PAC (#171)
Co-authored-by: Nupur Goyal <[email protected]>
1 parent b691033 commit 3fc6a52

File tree

8 files changed

+77
-14
lines changed

8 files changed

+77
-14
lines changed

modules/landing_zone_vsi/image_map.tf

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
locals {
22
image_region_map = {
3-
"hpcaas-lsf10-rhel88-v7" = {
4-
"us-east" = "r014-68a7ad8a-c513-418e-a30c-9a04ce0a144a"
5-
"eu-de" = "r010-b392ff76-fb8c-4b0f-9fef-fba89eb3ee5b"
6-
"us-south" = "r006-86f207dd-7029-4705-9222-0f5499387734"
3+
"hpcaas-lsf10-rhel88-v8" = {
4+
"us-east" = "r014-ee8b808f-e129-4d9e-965e-fed7003132e7"
5+
"eu-de" = "r010-bfad7737-77f9-4af7-9446-4783fb582258"
6+
"us-south" = "r006-d314bc1d-e904-4124-9055-0862e1a56579"
77
},
88
"hpcaas-lsf10-rhel88-compute-v5" = {
99
"us-east" = "r014-deb34fb1-edbf-464c-9af3-7efa2efcff3f"

samples/configs/hpc_catalog_values.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"enable_cos_integration" : "false",
2121
"cos_instance_name" : "__NULL__",
2222
"enable_fip" : "true",
23-
"management_image_name" : "hpcaas-lsf10-rhel88-v7",
23+
"management_image_name" : "hpcaas-lsf10-rhel88-v8",
2424
"compute_image_name" : "hpcaas-lsf10-rhel88-compute-v5",
2525
"login_image_name" : "hpcaas-lsf10-rhel88-compute-v5",
2626
"login_node_instance_type" : "bx2-2x8",

samples/configs/hpc_schematics_values.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@
197197
},
198198
{
199199
"name": "management_image_name",
200-
"value": "hpcaas-lsf10-rhel88-v7",
200+
"value": "hpcaas-lsf10-rhel88-v8",
201201
"type": "string",
202202
"secure": false,
203203
"description": "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering."

solutions/hpc/variables.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ variable "login_node_instance_type" {
176176
}
177177
variable "management_image_name" {
178178
type = string
179-
default = "hpcaas-lsf10-rhel88-v7"
179+
default = "hpcaas-lsf10-rhel88-v8"
180180
description = "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering."
181181

182182
}

tests/lsf/lsf_cluster_utils.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ func GetOSNameOfNode(t *testing.T, sClient *ssh.Client, hostIP string, logger *u
911911
return "", parseErr
912912
}
913913

914-
// HPCCheckFileMount checks if essential LSF directories (conf, config_done, das_staging_area, gui-conf, gui-logs, log, repository-path and work) exist
914+
// HPCCheckFileMount checks if essential LSF directories (10.1, conf, config_done, das_staging_area, data, gui-conf, gui-logs, log, repository-path and work) exist
915915
// on remote machines identified by the provided list of IP addresses. It utilizes SSH to
916916
// query and validate the directories. Any missing directory triggers an error, and the
917917
// function logs the success message if all directories are found.
@@ -1022,15 +1022,15 @@ func verifyDirectories(t *testing.T, sClient *ssh.Client, ip string, logger *uti
10221022
// Split the output into directory names
10231023
actualDirs := strings.Fields(strings.TrimSpace(string(outputTwo)))
10241024
// Define expected directories
1025-
expectedDirs := []string{"conf", "config_done", "das_staging_area", "gui-conf", "gui-logs", "log", "repository-path", "work"}
1025+
expectedDirs := []string{"10.1", "conf", "config_done", "das_staging_area", "data", "gui-conf", "gui-logs", "log", "repository-path", "work"}
10261026

10271027
// Verify if all expected directories exist
10281028
if !utils.VerifyDataContains(t, actualDirs, expectedDirs, logger) {
10291029
return fmt.Errorf("actual directory '%v' does not match the expected directory '%v' for node IP '%s'", actualDirs, expectedDirs, ip)
10301030
}
10311031

10321032
// Log directories existence
1033-
logger.Info(t, fmt.Sprintf("Directories [conf, config_done, das_staging_area, gui-conf, gui-logs, log, repository-path and work] exist on %s", ip))
1033+
logger.Info(t, fmt.Sprintf("Directories [10.1, conf, config_done, das_staging_area, data, gui-conf, gui-logs, log, repository-path and work] exist on %s", ip))
10341034
return nil
10351035
}
10361036

@@ -1321,7 +1321,7 @@ func LSFRunJobsAsLDAPUser(t *testing.T, sClient *ssh.Client, jobCmd, ldapUser st
13211321
return fmt.Errorf("job execution for ID %s exceeded the specified time", jobID)
13221322
}
13231323

1324-
// HPCCheckFileMountAsLDAPUser checks if essential LSF directories (conf, config_done, das_staging_area, gui-conf, gui-logs, log, repository-path and work) exist
1324+
// HPCCheckFileMountAsLDAPUser checks if essential LSF directories (10.1, conf, config_done, das_staging_area, data, gui-conf, gui-logs, log, repository-path and work) exist
13251325
// on remote machines It utilizes SSH to
13261326
// query and validate the directories. Any missing directory triggers an error, and the
13271327
// function logs the success message if all directories are found.
@@ -1408,15 +1408,15 @@ func verifyDirectoriesAsLdapUser(t *testing.T, sClient *ssh.Client, hostname str
14081408
// Split the output into directory names
14091409
actualDirs := strings.Fields(strings.TrimSpace(string(outputTwo)))
14101410
// Define expected directories
1411-
expectedDirs := []string{"conf", "config_done", "das_staging_area", "gui-conf", "gui-logs", "log", "repository-path", "work"}
1411+
expectedDirs := []string{"10.1", "conf", "config_done", "das_staging_area", "data", "gui-conf", "gui-logs", "log", "repository-path", "work"}
14121412

14131413
// Verify if all expected directories exist
14141414
if !utils.VerifyDataContains(t, actualDirs, expectedDirs, logger) {
14151415
return fmt.Errorf("actual directory '%v' does not match the expected directory '%v' for node IP '%s'", actualDirs, expectedDirs, hostname)
14161416
}
14171417

14181418
// Log directories existence
1419-
logger.Info(t, fmt.Sprintf("Directories [conf, config_done, das_staging_area, gui-conf, gui-logs, log, repository-path and work] exist on %s", hostname))
1419+
logger.Info(t, fmt.Sprintf("Directories [10.1, conf, config_done, das_staging_area, data, gui-conf, gui-logs, log, repository-path and work] exist on %s", hostname))
14201420
return nil
14211421
}
14221422

tests/other_test.go

+62
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,7 @@ func TestRunInvalidLDAPServerIP(t *testing.T) {
899899
if err != nil {
900900
// Check if the error message contains specific keywords indicating LDAP server IP issues
901901
result := utils.VerifyDataContains(t, err.Error(), "The connection to the existing LDAP server 10.10.10.10 failed", testLogger)
902+
assert.True(t, result)
902903
if result {
903904
testLogger.PASS(t, "Validation succeeded: Invalid LDAP server IP")
904905
} else {
@@ -984,6 +985,7 @@ func TestRunInvalidLDAPUsernamePassword(t *testing.T) {
984985
userPasswordError := utils.VerifyDataContains(t, err.Error(), "ldap_usr_pwd", testLogger)
985986
adminPasswordError := utils.VerifyDataContains(t, err.Error(), "ldap_adm_pwd", testLogger)
986987
result := usernameError && userPasswordError && adminPasswordError
988+
987989
// Assert that the result is true if all mandatory fields are missing
988990
assert.True(t, result)
989991
if result {
@@ -1115,6 +1117,7 @@ func TestRunInvalidDomainName(t *testing.T) {
11151117
if err != nil {
11161118
// Check if the error message contains specific keywords indicating domain name issues
11171119
result := utils.VerifyDataContains(t, err.Error(), "The domain name provided for compute is not a fully qualified domain name", testLogger)
1120+
assert.True(t, result)
11181121
if result {
11191122
testLogger.PASS(t, "Validation succeeded: Invalid domain name")
11201123
} else {
@@ -1339,3 +1342,62 @@ func TestRunExistSubnetIDVpcNameAsNull(t *testing.T) {
13391342
testLogger.FAIL(t, "Expected error did not occur on Without VPC name and with valid cluster_subnet_ids and login_subnet_id")
13401343
}
13411344
}
1345+
1346+
// TestRunInvalidSshKeysAndRemoteAllowedIP validates cluster creation with invalid ssh keys and remote allowed IP.
1347+
func TestRunInvalidSshKeysAndRemoteAllowedIP(t *testing.T) {
1348+
// Parallelize the test to run concurrently with others
1349+
t.Parallel()
1350+
1351+
// Setup test suite
1352+
setupTestSuite(t)
1353+
1354+
testLogger.Info(t, "Cluster creation process initiated for "+t.Name())
1355+
1356+
// HPC cluster prefix
1357+
hpcClusterPrefix := utils.GenerateTimestampedClusterPrefix(utils.GenerateRandomString())
1358+
1359+
// Retrieve cluster information from environment variables
1360+
envVars := GetEnvVars()
1361+
1362+
// Get the absolute path of solutions/hpc
1363+
abs, err := filepath.Abs("solutions/hpc")
1364+
require.NoError(t, err, "Unable to get absolute path")
1365+
1366+
terrPath := strings.ReplaceAll(abs, "tests/", "")
1367+
1368+
// Define Terraform options
1369+
terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{
1370+
TerraformDir: terrPath,
1371+
Vars: map[string]interface{}{
1372+
"cluster_prefix": hpcClusterPrefix,
1373+
"bastion_ssh_keys": []string{""},
1374+
"compute_ssh_keys": []string{""},
1375+
"zones": utils.SplitAndTrim(envVars.Zone, ","),
1376+
"remote_allowed_ips": []string{""},
1377+
"cluster_id": envVars.ClusterID,
1378+
"reservation_id": envVars.ReservationID,
1379+
},
1380+
})
1381+
1382+
// Apply the Terraform configuration
1383+
_, err = terraform.InitAndPlanE(t, terraformOptions)
1384+
1385+
// Check if an error occurred during apply
1386+
assert.Error(t, err, "Expected an error during apply")
1387+
1388+
if err != nil {
1389+
// Check if the error message contains specific keywords indicating domain name issues
1390+
result := utils.VerifyDataContains(t, err.Error(), "The provided IP address format is not valid", testLogger) &&
1391+
utils.VerifyDataContains(t, err.Error(), "No SSH Key found with name", testLogger)
1392+
assert.True(t, result)
1393+
if result {
1394+
testLogger.PASS(t, "Validation succeeded: Invalid ssh keys and remote allowed IP")
1395+
} else {
1396+
testLogger.FAIL(t, "Validation failed: Invalid ssh keys and remote allowed IP")
1397+
}
1398+
} else {
1399+
// Log an error if the expected error did not occur
1400+
t.Error("Expected error did not occur")
1401+
testLogger.FAIL(t, "Expected error did not occur on Invalid ssh keys and remote allowed IP")
1402+
}
1403+
}

tests/pr_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ var ignoreDestroys = []string{
3535
"module.check_cluster_status.null_resource.remote_exec[0]",
3636
"module.landing_zone_vsi.module.hpc.module.landing_zone_vsi.module.wait_management_candidate_vsi_booted.null_resource.remote_exec[0]",
3737
"module.landing_zone_vsi.module.hpc.module.landing_zone_vsi.module.wait_management_vsi_booted.null_resource.remote_exec[0]",
38+
"module.landing_zone_vsi.module.do_management_vsi_configuration.null_resource.remote_exec_script_cp_files[1]",
3839
}
3940

4041
// EnvVars stores environment variable values.

tests/test_config.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ remote_allowed_ips:
77
ssh_key: geretain-hpc
88
login_node_instance_type: bx2-2x8
99
login_image_name: hpcaas-lsf10-rhel88-compute-v5
10-
management_image_name: hpcaas-lsf10-rhel88-v7
10+
management_image_name: hpcaas-lsf10-rhel88-v8
1111
compute_image_name: hpcaas-lsf10-rhel88-compute-v5
1212
management_node_instance_type: bx2-2x8
1313
management_node_count: 2

0 commit comments

Comments
 (0)