Hi Mahmood,

Please try the following commands on rocks7:

systemctl restart slurmd

systemctl restart slurmctld

scontrol update node=rocks7 state=undrain


Best regards

Werner


On 05/06/2018 02:09 PM, Mahmood Naderan wrote:
Still I think for some reasons, slurms put the frontend in drain
state. Maybe, in order not to overload the main node by user jobs, it
set the state to drain which is actually fake. I also checked the
commands used in the slurm roll (package from Werner) and nothing was
incorrect. Similar to setting up slurm manually on a cluster, but this
time some automated scripts.


Regards,
Mahmood




On Sun, May 6, 2018 at 4:33 PM, Mahmood Naderan <mahmood...@gmail.com> wrote:
The chassis of the frontend is the same as compute nodes. A mother
board with two opterons and each have 16 cores. However, the head node
is not included correctly, while the computes are added without
problem.

[root@rocks7 ~]# grep -R rocks7 /etc/slurm
/etc/slurm/partitions.conf.new:PartitionName=EMERALD
AllowAccounts=em1,em4 Nodes=compute-0-[2-4],rocks7
/etc/slurm/slurmdbd.conf:DbdHost=rocks7
/etc/slurm/head.conf:ControlMachine=rocks7
/etc/slurm/head.conf:DefaultStorageHost=rocks7
/etc/slurm/parts:PartitionName=EMERALD AllowAccounts=em1,em4
Nodes=compute-0-[2-4],rocks7
/etc/slurm/parts.conf:PartitionName=EMERALD AllowAccounts=em1,em4
Nodes=compute-0-[2-4],rocks7
/etc/slurm/slurm.conf:NodeName=rocks7 NodeAddr=10.1.1.1 CPUs=20
/etc/slurm/slurm.conf:PartitionName=DEFAULT AllocNodes=rocks7 State=UP
[root@rocks7 ~]#
[root@rocks7 ~]#
[root@rocks7 ~]#
[root@rocks7 ~]# slurmd -C rocks7
NodeName=rocks7 slurmd: Considering each NUMA node as a socket
CPUs=32 Boards=1 SocketsPerBoard=4 CoresPerSocket=8 ThreadsPerCore=1
RealMemory=64261
UpTime=23-02:45:32
[root@rocks7 ~]# slurmd -C compute-0-0
NodeName=rocks7 slurmd: Considering each NUMA node as a socket
CPUs=32 Boards=1 SocketsPerBoard=4 CoresPerSocket=8 ThreadsPerCore=1
RealMemory=64261
UpTime=23-02:45:36
[root@rocks7 ~]#
[root@rocks7 ~]#
[root@rocks7 ~]#
[root@rocks7 ~]#
[root@rocks7 ~]# rocks run host compute-0-0 "lscpu"
Warning: untrusted X11 forwarding setup failed: xauth key data not generated
Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                32
On-line CPU(s) list:   0-31
Thread(s) per core:    2
Core(s) per socket:    8
Socket(s):             2
NUMA node(s):          4
Vendor ID:             AuthenticAMD
CPU family:            21
Model:                 1
Model name:            AMD Opteron(tm) Processor 6282 SE
Stepping:              2
CPU MHz:               1400.000
CPU max MHz:           2600.0000
CPU min MHz:           1400.0000
BogoMIPS:              5200.27
Virtualization:        AMD-V
L1d cache:             16K
L1i cache:             64K
L2 cache:              2048K
L3 cache:              6144K
NUMA node0 CPU(s):     0-7
NUMA node1 CPU(s):     8-15
NUMA node2 CPU(s):     16-23
NUMA node3 CPU(s):     24-31
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep
mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx
mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc art rep_good nopl
nonstop_tsc extd_apicid amd_dcm aperfmperf pni pclmulqdq monitor ssse3
cx16 sse4_1 sse4_2 popcnt aes xsave avx lahf_lm cmp_legacy svm extapic
cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs xop skinit wdt
lwp fma4 nodeid_msr topoext perfctr_core perfctr_nb cpb hw_pstate arat
npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid
decodeassists pausefilter pfthreshold
[root@rocks7 ~]#
[root@rocks7 ~]#
[root@rocks7 ~]# lscpu
Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                32
On-line CPU(s) list:   0-31
Thread(s) per core:    2
Core(s) per socket:    8
Socket(s):             2
NUMA node(s):          4
Vendor ID:             AuthenticAMD
CPU family:            21
Model:                 2
Model name:            AMD Opteron(tm) Processor 6380
Stepping:              0
CPU MHz:               1400.000
CPU max MHz:           2500.0000
CPU min MHz:           1400.0000
BogoMIPS:              4999.86
Virtualization:        AMD-V
L1d cache:             16K
L1i cache:             64K
L2 cache:              2048K
L3 cache:              6144K
NUMA node0 CPU(s):     0-7
NUMA node1 CPU(s):     8-15
NUMA node2 CPU(s):     16-23
NUMA node3 CPU(s):     24-31
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep
mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx
mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc art rep_good nopl
nonstop_tsc extd_apicid amd_dcm aperfmperf pni pclmulqdq monitor ssse3
fma cx16 sse4_1 sse4_2 popcnt aes xsave avx f16c lahf_lm cmp_legacy
svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs
xop skinit wdt lwp fma4 tce nodeid_msr tbm topoext perfctr_core
perfctr_nb cpb hw_pstate bmi1 arat npt lbrv svm_lock nrip_save
tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold
[root@rocks7 ~]#
[root@rocks7 ~]#
[root@rocks7 ~]# scontrol show node rocks7,compute-0-0
NodeName=rocks7 Arch=x86_64 CoresPerSocket=1
    CPUAlloc=0 CPUErr=0 CPUTot=1 CPULoad=0.01
    AvailableFeatures=(null)
    ActiveFeatures=(null)
    Gres=(null)
    NodeAddr=10.1.1.1 NodeHostName=rocks7 Version=17.11
    OS=Linux 3.10.0-693.5.2.el7.x86_64 #1 SMP Fri Oct 20 20:32:50 UTC 2017
    RealMemory=64261 AllocMem=0 FreeMem=10242 Sockets=1 Boards=1
    State=IDLE+DRAIN ThreadsPerCore=1 TmpDisk=281775 Weight=1 Owner=N/A
MCS_label=N/A
    Partitions=WHEEL,EMERALD
    BootTime=2018-04-13T13:05:00 SlurmdStartTime=2018-04-13T13:05:17
    CfgTRES=cpu=1,mem=64261M,billing=1
    AllocTRES=
    CapWatts=n/a
    CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
    ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s
    Reason=Low socket*core*thread count, Low CPUs [root@2018-05-05T21:49:45]

NodeName=compute-0-0 Arch=x86_64 CoresPerSocket=1
    CPUAlloc=0 CPUErr=0 CPUTot=32 CPULoad=0.01
    AvailableFeatures=rack-0,32CPUs
    ActiveFeatures=rack-0,32CPUs
    Gres=(null)
    NodeAddr=10.1.1.254 NodeHostName=compute-0-0 Version=17.11
    OS=Linux 3.10.0-693.5.2.el7.x86_64 #1 SMP Fri Oct 20 20:32:50 UTC 2017
    RealMemory=64261 AllocMem=0 FreeMem=63217 Sockets=32 Boards=1
    State=IDLE ThreadsPerCore=1 TmpDisk=444124 Weight=20511900
Owner=N/A MCS_label=N/A
    Partitions=CLUSTER,WHEEL,DIAMOND
    BootTime=2018-04-13T13:06:46 SlurmdStartTime=2018-05-05T21:17:51
    CfgTRES=cpu=32,mem=64261M,billing=47
    AllocTRES=
    CapWatts=n/a
    CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
    ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s


[root@rocks7 ~]#
Regards,
Mahmood


Reply via email to