The chassis of the frontend is the same as compute nodes. A mother board with two opterons and each have 16 cores. However, the head node is not included correctly, while the computes are added without problem.
[root@rocks7 ~]# grep -R rocks7 /etc/slurm /etc/slurm/partitions.conf.new:PartitionName=EMERALD AllowAccounts=em1,em4 Nodes=compute-0-[2-4],rocks7 /etc/slurm/slurmdbd.conf:DbdHost=rocks7 /etc/slurm/head.conf:ControlMachine=rocks7 /etc/slurm/head.conf:DefaultStorageHost=rocks7 /etc/slurm/parts:PartitionName=EMERALD AllowAccounts=em1,em4 Nodes=compute-0-[2-4],rocks7 /etc/slurm/parts.conf:PartitionName=EMERALD AllowAccounts=em1,em4 Nodes=compute-0-[2-4],rocks7 /etc/slurm/slurm.conf:NodeName=rocks7 NodeAddr=10.1.1.1 CPUs=20 /etc/slurm/slurm.conf:PartitionName=DEFAULT AllocNodes=rocks7 State=UP [root@rocks7 ~]# [root@rocks7 ~]# [root@rocks7 ~]# [root@rocks7 ~]# slurmd -C rocks7 NodeName=rocks7 slurmd: Considering each NUMA node as a socket CPUs=32 Boards=1 SocketsPerBoard=4 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=64261 UpTime=23-02:45:32 [root@rocks7 ~]# slurmd -C compute-0-0 NodeName=rocks7 slurmd: Considering each NUMA node as a socket CPUs=32 Boards=1 SocketsPerBoard=4 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=64261 UpTime=23-02:45:36 [root@rocks7 ~]# [root@rocks7 ~]# [root@rocks7 ~]# [root@rocks7 ~]# [root@rocks7 ~]# rocks run host compute-0-0 "lscpu" Warning: untrusted X11 forwarding setup failed: xauth key data not generated Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 32 On-line CPU(s) list: 0-31 Thread(s) per core: 2 Core(s) per socket: 8 Socket(s): 2 NUMA node(s): 4 Vendor ID: AuthenticAMD CPU family: 21 Model: 1 Model name: AMD Opteron(tm) Processor 6282 SE Stepping: 2 CPU MHz: 1400.000 CPU max MHz: 2600.0000 CPU min MHz: 1400.0000 BogoMIPS: 5200.27 Virtualization: AMD-V L1d cache: 16K L1i cache: 64K L2 cache: 2048K L3 cache: 6144K NUMA node0 CPU(s): 0-7 NUMA node1 CPU(s): 8-15 NUMA node2 CPU(s): 16-23 NUMA node3 CPU(s): 24-31 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc art rep_good nopl nonstop_tsc extd_apicid amd_dcm aperfmperf pni pclmulqdq monitor ssse3 cx16 sse4_1 sse4_2 popcnt aes xsave avx lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs xop skinit wdt lwp fma4 nodeid_msr topoext perfctr_core perfctr_nb cpb hw_pstate arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold [root@rocks7 ~]# [root@rocks7 ~]# [root@rocks7 ~]# lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 32 On-line CPU(s) list: 0-31 Thread(s) per core: 2 Core(s) per socket: 8 Socket(s): 2 NUMA node(s): 4 Vendor ID: AuthenticAMD CPU family: 21 Model: 2 Model name: AMD Opteron(tm) Processor 6380 Stepping: 0 CPU MHz: 1400.000 CPU max MHz: 2500.0000 CPU min MHz: 1400.0000 BogoMIPS: 4999.86 Virtualization: AMD-V L1d cache: 16K L1i cache: 64K L2 cache: 2048K L3 cache: 6144K NUMA node0 CPU(s): 0-7 NUMA node1 CPU(s): 8-15 NUMA node2 CPU(s): 16-23 NUMA node3 CPU(s): 24-31 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc art rep_good nopl nonstop_tsc extd_apicid amd_dcm aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 popcnt aes xsave avx f16c lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs xop skinit wdt lwp fma4 tce nodeid_msr tbm topoext perfctr_core perfctr_nb cpb hw_pstate bmi1 arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold [root@rocks7 ~]# [root@rocks7 ~]# [root@rocks7 ~]# scontrol show node rocks7,compute-0-0 NodeName=rocks7 Arch=x86_64 CoresPerSocket=1 CPUAlloc=0 CPUErr=0 CPUTot=1 CPULoad=0.01 AvailableFeatures=(null) ActiveFeatures=(null) Gres=(null) NodeAddr=10.1.1.1 NodeHostName=rocks7 Version=17.11 OS=Linux 3.10.0-693.5.2.el7.x86_64 #1 SMP Fri Oct 20 20:32:50 UTC 2017 RealMemory=64261 AllocMem=0 FreeMem=10242 Sockets=1 Boards=1 State=IDLE+DRAIN ThreadsPerCore=1 TmpDisk=281775 Weight=1 Owner=N/A MCS_label=N/A Partitions=WHEEL,EMERALD BootTime=2018-04-13T13:05:00 SlurmdStartTime=2018-04-13T13:05:17 CfgTRES=cpu=1,mem=64261M,billing=1 AllocTRES= CapWatts=n/a CurrentWatts=0 LowestJoules=0 ConsumedJoules=0 ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s Reason=Low socket*core*thread count, Low CPUs [root@2018-05-05T21:49:45] NodeName=compute-0-0 Arch=x86_64 CoresPerSocket=1 CPUAlloc=0 CPUErr=0 CPUTot=32 CPULoad=0.01 AvailableFeatures=rack-0,32CPUs ActiveFeatures=rack-0,32CPUs Gres=(null) NodeAddr=10.1.1.254 NodeHostName=compute-0-0 Version=17.11 OS=Linux 3.10.0-693.5.2.el7.x86_64 #1 SMP Fri Oct 20 20:32:50 UTC 2017 RealMemory=64261 AllocMem=0 FreeMem=63217 Sockets=32 Boards=1 State=IDLE ThreadsPerCore=1 TmpDisk=444124 Weight=20511900 Owner=N/A MCS_label=N/A Partitions=CLUSTER,WHEEL,DIAMOND BootTime=2018-04-13T13:06:46 SlurmdStartTime=2018-05-05T21:17:51 CfgTRES=cpu=32,mem=64261M,billing=47 AllocTRES= CapWatts=n/a CurrentWatts=0 LowestJoules=0 ConsumedJoules=0 ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s [root@rocks7 ~]# Regards, Mahmood On Sun, May 6, 2018 at 4:23 PM, Chris Samuel <ch...@csamuel.org> wrote: > On Sunday, 6 May 2018 7:28:55 PM AEST Mahmood Naderan wrote: > >> I also have noticed that State returned back to IDLE+DRAIN! > > Both you and Eric are having issues with Opteron 6300 series CPUs. > > I can't help but think the fact that each package in a socket has 2 NUMA nodes > is the cause of your pain. So whilst Slurm says it's treating each NUMA node > as a socket I wonder if at some point it's getting confused whether the number > of sockets is really 2 or 4? > >> I am guessing to set Sockets to 32!! > > No, that's definitely wrong. > > What does this say? > > grep -R rocks7 /etc/slurm > > All the best, > Chris > -- > Chris Samuel : http://www.csamuel.org/ : Melbourne, VIC >