From 8b4536b41d18585dd5ae96e389b285d25d4dc2a6 Mon Sep 17 00:00:00 2001 From: Stephan <31624652+stephandooper@users.noreply.github.com> Date: Wed, 9 Aug 2023 10:10:55 +0200 Subject: [PATCH] Update slurm.conf Uncommented AccountingStorageEnforce Added UnkillableStepTimeout to accomodate older water-cooled machines that take a longer time to start/kill jobs Adjusted killwait from 30 to 90 for the same reason. --- roles/slurm/templates/etc/slurm/slurm.conf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/roles/slurm/templates/etc/slurm/slurm.conf b/roles/slurm/templates/etc/slurm/slurm.conf index 70eb837ac..3f4de7f2a 100644 --- a/roles/slurm/templates/etc/slurm/slurm.conf +++ b/roles/slurm/templates/etc/slurm/slurm.conf @@ -88,8 +88,9 @@ SlurmctldTimeout=120 SlurmdTimeout=300 InactiveLimit=0 MinJobAge=300 -KillWait=30 +KillWait=90 Waittime=0 +UnkillableStepTimeout=180 # SCHEDULING SchedulerType=sched/backfill @@ -123,7 +124,7 @@ AccountingStorageTRES=gres/gpu AccountingStorageType=accounting_storage/slurmdbd AccountingStorageHost={{ groups["slurm-master"][0] }} #AccountingStorageLoc= -#AccountingStorageEnforce=associations,limits,qos +AccountingStorageEnforce=associations,limits,qos AccountingStorageUser={{ slurm_db_username }} AccountingStoragePass=/var/run/munge/munge.socket.2