From f142ae88f733d43c54916c94508ef4c6db1cfcac Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 10 Sep 2019 12:42:46 -0400 Subject: [PATCH 1/7] import CI files from dask-jobqueue --- ci/slurm.sh | 28 +++++++++++ ci/slurm/Dockerfile | 4 ++ ci/slurm/docker-compose.yml | 81 +++++++++++++++++++++++++++++++ ci/slurm/register_cluster.sh | 5 ++ ci/slurm/slurm.conf | 94 ++++++++++++++++++++++++++++++++++++ ci/slurm/start-slurm.sh | 9 ++++ 6 files changed, 221 insertions(+) create mode 100644 ci/slurm.sh create mode 100644 ci/slurm/Dockerfile create mode 100644 ci/slurm/docker-compose.yml create mode 100644 ci/slurm/register_cluster.sh create mode 100644 ci/slurm/slurm.conf create mode 100644 ci/slurm/start-slurm.sh diff --git a/ci/slurm.sh b/ci/slurm.sh new file mode 100644 index 0000000..985bcc3 --- /dev/null +++ b/ci/slurm.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +function jobqueue_before_install { + docker version + docker-compose version + + # start slurm cluster + pushd./ci/slurm + ./start-slurm.sh + popd + + docker ps -a + docker images +} + +function jobqueue_install { + docker exec -it slurmctld /bin/bash -c "cd /dask-jobqueue; pip install -e ." +} + +function jobqueue_script { + docker exec -it slurmctld /bin/bash -c "cd /dask-jobqueue; pytest dask_jobqueue --verbose -E slurm" +} + +function jobqueue_after_script { + docker exec -it slurmctld bash -c 'sinfo' + docker exec -it slurmctld bash -c 'squeue' + docker exec -it slurmctld bash -c 'sacct -l' +} diff --git a/ci/slurm/Dockerfile b/ci/slurm/Dockerfile new file mode 100644 index 0000000..c2b6293 --- /dev/null +++ b/ci/slurm/Dockerfile @@ -0,0 +1,4 @@ +FROM giovtorres/slurm-docker-cluster +ENV LC_ALL en_US.UTF-8 + +COPY slurm.conf /etc/slurm/slurm.conf diff --git a/ci/slurm/docker-compose.yml b/ci/slurm/docker-compose.yml new file mode 100644 index 0000000..414ebfe --- /dev/null +++ b/ci/slurm/docker-compose.yml @@ -0,0 +1,81 @@ +version: "2.2" + +services: + mysql: + image: mysql:5.7 + hostname: mysql + container_name: mysql + environment: + MYSQL_RANDOM_ROOT_PASSWORD: "yes" + MYSQL_DATABASE: slurm_acct_db + MYSQL_USER: slurm + MYSQL_PASSWORD: password + volumes: + - var_lib_mysql:/var/lib/mysql + + slurmdbd: + build: . + command: ["slurmdbd"] + container_name: slurmdbd + hostname: slurmdbd + volumes: + - etc_munge:/etc/munge + - etc_slurm:/etc/slurm + - var_log_slurm:/var/log/slurm + expose: + - "6819" + depends_on: + - mysql + + slurmctld: + build: . + command: ["slurmctld"] + container_name: slurmctld + hostname: slurmctld + volumes: + - etc_munge:/etc/munge + - etc_slurm:/etc/slurm + - slurm_jobdir:/data + - var_log_slurm:/var/log/slurm + - ../..:/dask-jobqueue + expose: + - "6817" + depends_on: + - "slurmdbd" + + c1: + build: . + command: ["slurmd"] + hostname: c1 + container_name: c1 + volumes: + - etc_munge:/etc/munge + - etc_slurm:/etc/slurm + - slurm_jobdir:/data + - var_log_slurm:/var/log/slurm + expose: + - "6818" + depends_on: + - "slurmctld" + + c2: + build: . + command: ["slurmd"] + hostname: c2 + container_name: c2 + volumes: + - etc_munge:/etc/munge + - etc_slurm:/etc/slurm + - slurm_jobdir:/data + - var_log_slurm:/var/log/slurm + expose: + - "6818" + depends_on: + - "slurmctld" + +volumes: + etc_munge: + etc_slurm: + slurm_jobdir: + var_lib_mysql: + var_log_slurm: diff --git a/ci/slurm/register_cluster.sh b/ci/slurm/register_cluster.sh new file mode 100644 index 0000000..ef3d4d0 --- /dev/null +++ b/ci/slurm/register_cluster.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e + +docker exec slurmctld bash -c "/usr/bin/sacctmgr --immediate add cluster name=linux" && \ +docker-compose restart slurmdbd slurmctld diff --git a/ci/slurm/slurm.conf b/ci/slurm/slurm.conf new file mode 100644 index 0000000..0aad9f1 --- /dev/null +++ b/ci/slurm/slurm.conf @@ -0,0 +1,94 @@ +# slurm.conf +# +# See the slurm.conf man page for more information. +# +ClusterName=linux +ControlMachine=slurmctld +ControlAddr=slurmctld +#BackupController= +#BackupAddr= +# +SlurmUser=slurm +#SlurmdUser=root +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +#JobCredentialPrivateKey= +#JobCredentialPublicCertificate= +StateSaveLocation=/var/lib/slurmd +SlurmdSpoolDir=/var/spool/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmd/slurmctld.pid +SlurmdPidFile=/var/run/slurmd/slurmd.pid +ProctrackType=proctrack/linuxproc +#PluginDir= +CacheGroups=0 +#FirstJobId= +ReturnToService=0 +#MaxJobCount= +#PlugStackConfig= +#PropagatePrioProcess= +#PropagateResourceLimits= +#PropagateResourceLimitsExcept= +#Prolog= +#Epilog= +#SrunProlog= +#SrunEpilog= +#TaskProlog= +#TaskEpilog= +#TaskPlugin= +#TrackWCKey=no +#TreeWidth=50 +#TmpFS= +#UsePAM= +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +#SchedulerAuth= +#SchedulerPort= +#SchedulerRootFilter= +SelectType=select/cons_res +SelectTypeParameters=CR_CPU_Memory +FastSchedule=1 +#PriorityType=priority/multifactor +#PriorityDecayHalfLife=14-0 +#PriorityUsageResetPeriod=14-0 +#PriorityWeightFairshare=100000 +#PriorityWeightAge=1000 +#PriorityWeightPartition=10000 +#PriorityWeightJobSize=1000 +#PriorityMaxAge=1-0 +# +# LOGGING +SlurmctldDebug=3 +SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmdDebug=3 +SlurmdLogFile=/var/log/slurm/slurmd.log +JobCompType=jobcomp/filetxt +JobCompLoc=/var/log/slurm/jobcomp.log +# +# ACCOUNTING +JobAcctGatherType=jobacct_gather/linux +JobAcctGatherFrequency=30 +# +AccountingStorageType=accounting_storage/slurmdbd +AccountingStorageHost=slurmdbd +AccountingStoragePort=6819 +AccountingStorageLoc=slurm_acct_db +#AccountingStoragePass= +#AccountingStorageUser= +# +# COMPUTE NODES +NodeName=c[1-2] RealMemory=4096 CPUs=2 State=UNKNOWN +# +# PARTITIONS +PartitionName=normal Default=yes Nodes=c[1-2] Priority=50 DefMemPerCPU=2048 Shared=NO MaxNodes=2 MaxTime=5-00:00:00 DefaultTime=5-00:00:00 State=UP diff --git a/ci/slurm/start-slurm.sh b/ci/slurm/start-slurm.sh new file mode 100644 index 0000000..5ba3447 --- /dev/null +++ b/ci/slurm/start-slurm.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +docker-compose up --build -d +while [ `./register_cluster.sh 2>&1 | grep "sacctmgr: error" | wc -l` -ne 0 ] + do + echo "Waiting for SLURM cluster to become ready"; + sleep 2 + done +echo "SLURM properly configured" From e10677f7c98c464c37e33772237fafc341e56b07 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 10 Sep 2019 12:47:43 -0400 Subject: [PATCH 2/7] add travis CI for slurm --- .travis.yml | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 074c2ce..7ef27c7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,31 @@ language: julia -os: - - osx - - linux -julia: - - 1.0 - - 1.1 - - 1.2 - - 1.3 - - nightly notifications: email: false +sudo: required + +matrix: + - include: + - julia: 1.0 + env: + - JOBQUEUE=slurm + +services: docker +before_install: + - set -e + - source ci/${JOBQUEUE}.sh + - jobqueue_before_install + - set +e +install: + - set -e + - jobqueue_install + - set +e +script: + - set -e + - jobqueue_script + - set +e +after_script: + - jobqueue_after_script + #script: # the default script is equivalent to the following # - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi # - julia -e 'Pkg.clone(pwd()); Pkg.build("Example"); Pkg.test("Example"; coverage=true)'; From cce663c36beadd043ffd1c4bbbbc8f3e02b9f3ad Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 10 Sep 2019 12:55:02 -0400 Subject: [PATCH 3/7] make it more julia esque --- ci/slurm.sh | 4 ++-- ci/slurm/docker-compose.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/slurm.sh b/ci/slurm.sh index 985bcc3..236b04a 100644 --- a/ci/slurm.sh +++ b/ci/slurm.sh @@ -14,11 +14,11 @@ function jobqueue_before_install { } function jobqueue_install { - docker exec -it slurmctld /bin/bash -c "cd /dask-jobqueue; pip install -e ." + docker exec -it slurmctld /bin/bash -c "cd /workspace; julia --project -e 'using Pkg; Pkg.build();" } function jobqueue_script { - docker exec -it slurmctld /bin/bash -c "cd /dask-jobqueue; pytest dask_jobqueue --verbose -E slurm" + docker exec -it slurmctld /bin/bash -c "cd /workspace; julia --project test/runtests.jl slurm" } function jobqueue_after_script { diff --git a/ci/slurm/docker-compose.yml b/ci/slurm/docker-compose.yml index 414ebfe..bae6ba2 100644 --- a/ci/slurm/docker-compose.yml +++ b/ci/slurm/docker-compose.yml @@ -37,7 +37,7 @@ services: - etc_slurm:/etc/slurm - slurm_jobdir:/data - var_log_slurm:/var/log/slurm - - ../..:/dask-jobqueue + - ../..:/workspace expose: - "6817" depends_on: From d07cb77d33771447457679724729e19708ec7fbd Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 10 Sep 2019 13:01:59 -0400 Subject: [PATCH 4/7] fix matrix --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7ef27c7..24f2553 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,10 +4,10 @@ notifications: sudo: required matrix: - - include: - - julia: 1.0 - env: - - JOBQUEUE=slurm + include: + - julia: 1.0 + env: + - JOBQUEUE=slurm services: docker before_install: From 239d7f6059a8b90e149940e696f0de85448dbc7d Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 10 Sep 2019 14:06:57 -0400 Subject: [PATCH 5/7] fix typo --- ci/slurm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/slurm.sh b/ci/slurm.sh index 236b04a..b3245ff 100644 --- a/ci/slurm.sh +++ b/ci/slurm.sh @@ -5,7 +5,7 @@ function jobqueue_before_install { docker-compose version # start slurm cluster - pushd./ci/slurm + pushd ./ci/slurm ./start-slurm.sh popd From 2b5d62b7afdc335bb65fc1a575ad6515fd9fbab4 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 10 Sep 2019 14:09:47 -0400 Subject: [PATCH 6/7] +x --- ci/slurm.sh | 0 ci/slurm/register_cluster.sh | 0 ci/slurm/start-slurm.sh | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ci/slurm.sh mode change 100644 => 100755 ci/slurm/register_cluster.sh mode change 100644 => 100755 ci/slurm/start-slurm.sh diff --git a/ci/slurm.sh b/ci/slurm.sh old mode 100644 new mode 100755 diff --git a/ci/slurm/register_cluster.sh b/ci/slurm/register_cluster.sh old mode 100644 new mode 100755 diff --git a/ci/slurm/start-slurm.sh b/ci/slurm/start-slurm.sh old mode 100644 new mode 100755 From b92935f13532702ffe98f82407a4e8d11cdd4ec7 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 10 Sep 2019 14:39:47 -0400 Subject: [PATCH 7/7] missing ' --- ci/slurm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/slurm.sh b/ci/slurm.sh index b3245ff..bce67f8 100755 --- a/ci/slurm.sh +++ b/ci/slurm.sh @@ -14,7 +14,7 @@ function jobqueue_before_install { } function jobqueue_install { - docker exec -it slurmctld /bin/bash -c "cd /workspace; julia --project -e 'using Pkg; Pkg.build();" + docker exec -it slurmctld /bin/bash -c "cd /workspace; julia --project -e 'using Pkg; Pkg.build();'" } function jobqueue_script {