Hi,

I made a short script in python to test if slurm was correctly limiting the
number of CPUs available to each partition. The script is as follows:
import multiprocessing as mp
import time as t

def fibonacci(n):
    n = int(n)
    def fibon(a,b,n,result):
        c = a+b
        result.append(c)
        if c < n:
            fibon(b,c,n,result)
        return result
    return fibon(0,1,n,[])

def calcnfib(n):
    res = fibonacci(n)
    return res[-1]

def benchmark(pool):
    t0 = t.time()
    out = pool.map(calcnfib, range(1000000, 1000000000,1000))
    tf = t.time()
    return str(tf-t0)

pool = mp.Pool(4)
print("4: " + benchmark(pool))

pool = mp.Pool(32)
print("32: " + benchmark(pool))

pool = mp.Pool(64)
print("64: " + benchmark(pool))

pool = mp.Pool(128)
print("128: " + benchmark(pool))

It is called using the following submission script:
#!/bin/bash
#SBATCH --partition=full
#SBATCH --job-name="Large"
source testenv1/bin/activate
python3 multithread_example.py

The slurm out file reads
4: 5.660163640975952
32: 5.762076139450073
64: 5.8220226764678955
128: 5.85421347618103

However, if I run
source testenv1/bin/activate
python3 multithread_example.py

I find faster and more expected behavior
4: 1.5878620147705078
32: 0.34162330627441406
64: 0.24987316131591797
128: 0.2247719764709472

For reference my slurm configuration file is
# slurm.conf file generated by configurator easy.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
#SlurmctldHost=localhost
ControlMachine=localhost

#MailProg=/bin/mail
MpiDefault=none
#MpiParams=ports=#-#
ProctrackType=proctrack/cgroup
ReturnToService=1
SlurmctldPidFile=/home/slurm/run/slurmctld.pid
#SlurmctldPort=6817
SlurmdPidFile=/home/slurm/run/slurmd.pid
#SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurm/slurmd/
SlurmUser=slurm
#SlurmdUser=root
StateSaveLocation=/home/slurm/spool/
SwitchType=switch/none
TaskPlugin=task/affinity

# TIMERS
#KillWait=30
#MinJobAge=300
#SlurmctldTimeout=120
#SlurmdTimeout=300

# SCHEDULING
SchedulerType=sched/backfill
SelectType=select/cons_tres
SelectTypeParameters=CR_Core

# LOGGING AND ACCOUNTING
AccountingStorageType=accounting_storage/none
ClusterName=cluster
#JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
#SlurmctldDebug=info
SlurmctldLogFile=/home/slurm/log/slurmctld.log
#SlurmdDebug=info
#SlurmdLogFile=

# COMPUTE NODES
NodeName=localhost CPUs=128 RealMemory=257682 Sockets=1 CoresPerSocket=64
ThreadsPerCore=2 State=UNKNOWN
PartitionName=full Nodes=localhost Default=YES MaxTime=INFINITE State=UP
PartitionName=half Nodes=localhost Default=NO MaxTime=INFINITE State=UP
MaxNodes=1 MaxCPUsPerNode=64 MaxMemPerNode=128841

Here is my cgroup.conf file as well
CgroupAutomount=yes
ConstrainCores=no
ConstrainRAMSpace=no

If anyone has any suggestions for what might be going wrong and why the
script takes much longer when run with slurm, please let me know!

Best,
John

Reply via email to