## slurm_test.conf: main configuration file for SLURM, used ## when running slurm's own test suite ## $Id$ ### ### Cluster ### ClusterName=abel #default: AuthType=auth/munge #default: CryptoType=crypto/munge SlurmctldPort=6817 SlurmdPort=6818 TmpFs=/work ## FIXME: /scratch? #default: TreeWidth=50 FIXME: try ceil(sqrt(#nodes)) TreeWidth=26 ## Timers: #default: MessageTimeout=10 ## FIXME: should be reduced when/if we see that slurmd is behaving: #SlurmdTimeout=36000 WaitTime=0 ### ### Slurmctld ### ControlMachine=nielshenrik #default: MinJobAge=300 SlurmUser=slurm StateSaveLocation=/state/partition1/slurm/slurmstate ### ### Nodes ### FastSchedule=2 HealthCheckInterval=300 #FIXME: Removed for testing: HealthCheckProgram=/hpc/sbin/healthcheck ReturnToService=1 GresTypes=gpu,localtmp ## The default is 61.5 GB (62976 MB) allocatable RAM. Nodename=DEFAULT Sockets=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=62976 Gres=localtmp:100 State=unknown PartitionName=DEFAULT State=up Shared=NO Include /etc/slurm/slurmnodes.conf TopologyPlugin=topology/tree ### ### Jobs ### #FIXME: Removed for testing: PropagateResourceLimits=STACK #FIXME: Removed for testing: DefMemPerCPU=1000 EnforcePartLimits=yes #default: InactiveLimit=0 JobFileAppend=1 #default: JobRequeue=1 #FIXME: Removed for testing: JobSubmitPlugins=lua #default: MaxJobCount=10000 #default: MpiDefault=none #FIXME: openmpi? #default: OverTimeLimit=0 #default: VSizeFactor=0 TaskPlugin=task/cgroup ## Prologs/Epilogs # run by slurmctld as SlurmUser on ControlMachine before granting a job allocation: #PrologSlurmctld= # run by slurmd on each node prior to the first job step on the node: Prolog=/hpc/sbin/prolog_slurmd # run by srun on the node running srun, prior to the launch of a job step: #SrunProlog= # run as user for each task prior to initiate the task: TaskProlog=/hpc/sbin/prolog_task # run as user for each task after the task finishes: #TaskEpilog= # run by srun on the node running srun, after a job step finishes: #SrunEpilog= # run as root on each node when job has completed: Epilog=/hpc/sbin/epilog_slurmd # run as SlurmUser on ControlMachine after the allocation is released: #EpilogSlurmctld= ### ### Job Priority ### PriorityType=priority/multifactor #default: PriorityCalcPeriod=5 #default: PriorityDecayHalfLife=7-0 #(7 days) #default: PriorityUsageResetPeriod=NONE #default: PriorityMaxAge=7-0 #(7 days) #default: PriorityFavorSmall=no PriorityWeightAge=10000 PriorityWeightFairshare=10000 PriorityWeightJobSize=1000 #default: PriorityWeightPartition=0 PriorityWeightQOS=10000 ### ### Scheduling ### SchedulerType=sched/backfill #default: SchedulerParameters=default_queue_depth=100,defer=?,bf_interval=30,bf_window=1440,max_job_bf=50,bf_max_job_user=0 SchedulerParameters=bf_max_job_user=10 SelectType=select/cons_res SelectTypeParameters=CR_CPU_Memory # FIXME: perhaps Core! PreemptMode=requeue PreemptType=preempt/qos CompleteWait=32 # KillWait + 2 #default: KillWait=30 ### ### Checkpointing ### # ************** WARNING *********************** # *** ENABLING/DISABLING THIS KILLS ALL JOBS *** # ********************************************** ## Checkpointing currently not implemented on abel ### ### Logging ### SlurmctldDebug=5 SlurmctldLogFile=/var/log/slurm/slurmctld.log SlurmSchedLogLevel=1 SlurmSchedLogFile=/var/log/slurm/sched.log SlurmdDebug=5 SlurmdLogFile=/var/log/slurm/slurmd.log #default: DebugFlags= DebugFlags=Backfill ### ### Accounting (Slurmdbd) ### AccountingStorageType=accounting_storage/slurmdbd AccountingStorageHost=nielshenrik JobAcctGatherType=jobacct_gather/linux #default: JobAcctGatherFrequency=30 ProctrackType=proctrack/cgroup #FIXME: Removed for testing: AccountingStorageEnforce=limits,qos # kombinasjon av associations < limits < wckeys, qos