Skip to content

Start testing ClusterManagers.jl #125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,31 @@
language: julia
os:
- osx
- linux
julia:
- 1.0
- 1.1
- 1.2
- 1.3
- nightly
notifications:
email: false
sudo: required

matrix:
include:
- julia: 1.0
env:
- JOBQUEUE=slurm

services: docker
before_install:
- set -e
- source ci/${JOBQUEUE}.sh
- jobqueue_before_install
- set +e
install:
- set -e
- jobqueue_install
- set +e
script:
- set -e
- jobqueue_script
- set +e
after_script:
- jobqueue_after_script

#script: # the default script is equivalent to the following
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia -e 'Pkg.clone(pwd()); Pkg.build("Example"); Pkg.test("Example"; coverage=true)';
Expand Down
28 changes: 28 additions & 0 deletions ci/slurm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env bash

function jobqueue_before_install {
docker version
docker-compose version

# start slurm cluster
pushd ./ci/slurm
./start-slurm.sh
popd

docker ps -a
docker images
}

function jobqueue_install {
docker exec -it slurmctld /bin/bash -c "cd /workspace; julia --project -e 'using Pkg; Pkg.build();'"
}

function jobqueue_script {
docker exec -it slurmctld /bin/bash -c "cd /workspace; julia --project test/runtests.jl slurm"
}

function jobqueue_after_script {
docker exec -it slurmctld bash -c 'sinfo'
docker exec -it slurmctld bash -c 'squeue'
docker exec -it slurmctld bash -c 'sacct -l'
}
4 changes: 4 additions & 0 deletions ci/slurm/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM giovtorres/slurm-docker-cluster
ENV LC_ALL en_US.UTF-8

COPY slurm.conf /etc/slurm/slurm.conf
81 changes: 81 additions & 0 deletions ci/slurm/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
version: "2.2"

services:
mysql:
image: mysql:5.7
hostname: mysql
container_name: mysql
environment:
MYSQL_RANDOM_ROOT_PASSWORD: "yes"
MYSQL_DATABASE: slurm_acct_db
MYSQL_USER: slurm
MYSQL_PASSWORD: password
volumes:
- var_lib_mysql:/var/lib/mysql

slurmdbd:
build: .
command: ["slurmdbd"]
container_name: slurmdbd
hostname: slurmdbd
volumes:
- etc_munge:/etc/munge
- etc_slurm:/etc/slurm
- var_log_slurm:/var/log/slurm
expose:
- "6819"
depends_on:
- mysql

slurmctld:
build: .
command: ["slurmctld"]
container_name: slurmctld
hostname: slurmctld
volumes:
- etc_munge:/etc/munge
- etc_slurm:/etc/slurm
- slurm_jobdir:/data
- var_log_slurm:/var/log/slurm
- ../..:/workspace
expose:
- "6817"
depends_on:
- "slurmdbd"

c1:
build: .
command: ["slurmd"]
hostname: c1
container_name: c1
volumes:
- etc_munge:/etc/munge
- etc_slurm:/etc/slurm
- slurm_jobdir:/data
- var_log_slurm:/var/log/slurm
expose:
- "6818"
depends_on:
- "slurmctld"

c2:
build: .
command: ["slurmd"]
hostname: c2
container_name: c2
volumes:
- etc_munge:/etc/munge
- etc_slurm:/etc/slurm
- slurm_jobdir:/data
- var_log_slurm:/var/log/slurm
expose:
- "6818"
depends_on:
- "slurmctld"

volumes:
etc_munge:
etc_slurm:
slurm_jobdir:
var_lib_mysql:
var_log_slurm:
5 changes: 5 additions & 0 deletions ci/slurm/register_cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -e

docker exec slurmctld bash -c "/usr/bin/sacctmgr --immediate add cluster name=linux" && \
docker-compose restart slurmdbd slurmctld
94 changes: 94 additions & 0 deletions ci/slurm/slurm.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# slurm.conf
#
# See the slurm.conf man page for more information.
#
ClusterName=linux
ControlMachine=slurmctld
ControlAddr=slurmctld
#BackupController=
#BackupAddr=
#
SlurmUser=slurm
#SlurmdUser=root
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
StateSaveLocation=/var/lib/slurmd
SlurmdSpoolDir=/var/spool/slurmd
SwitchType=switch/none
MpiDefault=none
SlurmctldPidFile=/var/run/slurmd/slurmctld.pid
SlurmdPidFile=/var/run/slurmd/slurmd.pid
ProctrackType=proctrack/linuxproc
#PluginDir=
CacheGroups=0
#FirstJobId=
ReturnToService=0
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
#TaskPlugin=
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
#UsePAM=
#
# TIMERS
SlurmctldTimeout=300
SlurmdTimeout=300
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
#
# SCHEDULING
SchedulerType=sched/backfill
#SchedulerAuth=
#SchedulerPort=
#SchedulerRootFilter=
SelectType=select/cons_res
SelectTypeParameters=CR_CPU_Memory
FastSchedule=1
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
#PriorityWeightFairshare=100000
#PriorityWeightAge=1000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=1000
#PriorityMaxAge=1-0
#
# LOGGING
SlurmctldDebug=3
SlurmctldLogFile=/var/log/slurm/slurmctld.log
SlurmdDebug=3
SlurmdLogFile=/var/log/slurm/slurmd.log
JobCompType=jobcomp/filetxt
JobCompLoc=/var/log/slurm/jobcomp.log
#
# ACCOUNTING
JobAcctGatherType=jobacct_gather/linux
JobAcctGatherFrequency=30
#
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost=slurmdbd
AccountingStoragePort=6819
AccountingStorageLoc=slurm_acct_db
#AccountingStoragePass=
#AccountingStorageUser=
#
# COMPUTE NODES
NodeName=c[1-2] RealMemory=4096 CPUs=2 State=UNKNOWN
#
# PARTITIONS
PartitionName=normal Default=yes Nodes=c[1-2] Priority=50 DefMemPerCPU=2048 Shared=NO MaxNodes=2 MaxTime=5-00:00:00 DefaultTime=5-00:00:00 State=UP
9 changes: 9 additions & 0 deletions ci/slurm/start-slurm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

docker-compose up --build -d
while [ `./register_cluster.sh 2>&1 | grep "sacctmgr: error" | wc -l` -ne 0 ]
do
echo "Waiting for SLURM cluster to become ready";
sleep 2
done
echo "SLURM properly configured"