Hi all, background:
1. lustre(2.15.5) + corosync(3.1.5) + pacemaker(2.1.0-8.el8) + pcs(0.10.8) 2. there are 11 nodes in total, divided into 3 groups. If a node fails within a group, the resources can only be taken over by nodes within that group. 3. Each node has 2 MDTs and 16 OSTs. Issues: 1. The resource configuration time progressively increases. the second mdt-0 cost only 8s,the last ost-175 cost 1min:37s 2. The total time taken for the configuration is approximately 2 hours and 31 minutes. Is there a way to improve it? attachment: create bash: pcs_create.sh create log: pcs_create.log
#!/bin/bash
source /opt/storage/lustre/conf/install-pcs.conf #请根据实际情况调整相关内容
input_file="/opt/storage/lustre/conf/lustre-nvme.info" # 定义输入文件
echo "pcs create begin ($(date))"
# 将 mgs_nodes 转换为数组
IFS=',' read -ra mgs_array <<< "$mgs_nodes"
# 使用 IFS 和 read 命令将 host_groups 按分号拆分为数组
IFS=';' read -ra host_groups_array <<< "$host_groups"
# 获取总的组个数
total_groups=${#host_groups_array[@]}
total_hosts=0
for group in "${host_groups_array[@]}"; do
IFS=',' read -ra nodes <<< "$group"
total_hosts=$((total_hosts + ${#nodes[@]}))
done
all_nodes=()
# 遍历每个主机组并将主机添加到all_nodes数组中
for group in "${host_groups_array[@]}"; do
# 将每个组按逗号分割为单独的主机
IFS=',' read -ra nodes <<< "$group"
all_nodes+=("${nodes[@]}")
done
# 逐行读取文件
while read -r line; do
# 跳过标题行和空行
if [[ "$line" =~ ^(lustre_name|service|$) ]]; then
continue
fi
# 使用awk提取字段
role=$(echo "$line" | awk '{print $1}')
vol_name=$(echo "$line" | awk '{print $3}')
dev_name=$(echo "$line" | awk '{print $4}')
# 从vol_name提取最后的数字
last_number=$(echo "$vol_name" | grep -o -E '[0-9]+$')
# 根据角色生成目录名称和pcs命令
case "$role" in
MGS)
echo mgs
directory="/lustre/mgs"
pcs resource create mgs "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled
pcs resource update mgs op start timeout=300s stop timeout=300s monitor timeout=300s
pcs resource update mgs meta migration-threshold=0
first_node=${mgs_array[0]}
pcs constraint location mgs prefers $first_node=2000
for node in "${all_nodes[@]}"; do
if [[ ! " ${mgs_array[*]} " =~ " $node " ]]; then
pcs constraint location mgs avoids $node
fi
done
pcs resource enable mgs
;;
MDS)
echo mdt-$last_number
echo "1 ($(date))"
directory="/lustre/mdt-$last_number"
pcs resource create mdt-$last_number "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled
pcs resource update mdt-$last_number op start timeout=300s stop timeout=300s monitor timeout=300s
pcs resource update mdt-$last_number meta migration-threshold=0
pcs constraint order start mgs then mdt-$last_number kind=Mandatory symmetrical=false > /dev/null
echo "2 ($(date))"
if [[ $first_node_have_mdt == "false" ]]; then
host_number=$(( last_number % (total_hosts-1) ))
host_number=$((host_number+1))
else
host_number=$(( last_number % total_hosts ))
fi
host_node="${all_nodes[$host_number]}"
#避免在第一个主机上运行
if [[ $first_node_have_mdt == "false" ]]; then
group="${host_groups_array[0]}"
IFS=',' read -ra nodes <<< "$group"
if [[ " ${nodes[*]} " == *" $host_node "* ]]; then
first_node=${all_nodes[0]}
pcs constraint location mdt-$last_number avoids $first_node
fi
fi
echo "3 ($(date))"
# 配置位置限制
pcs constraint location mdt-$last_number prefers $host_node=600
for i in "${!host_groups_array[@]}"; do
group="${host_groups_array[$i]}"
IFS=',' read -ra nodes <<< "$group"
if [[ " ${nodes[*]} " != *" $host_node "* ]]; then
for node in "${nodes[@]}"; do
pcs constraint location mdt-$last_number avoids $node
done
fi
done
echo "4 ($(date))"
pcs resource enable mdt-$last_number
echo "5 ($(date))"
;;
OSS)
echo ost-$last_number
echo "1 ($(date))"
directory="/lustre/ost-$last_number"
pcs resource create ost-$last_number "ocf:heartbeat:Filesystem" device="$dev_name" directory="$directory" fstype="lustre" --disabled
pcs resource update ost-$last_number op start timeout=300s stop timeout=300s monitor timeout=300s
pcs resource update ost-$last_number meta migration-threshold=0
pcs constraint order start mgs then ost-$last_number kind=Mandatory symmetrical=false > /dev/null
host_number=$(( last_number % total_hosts )) # 计算资源应分配的主机序号
host_node="${all_nodes[$host_number]}"
echo "2 ($(date))"
# 配置位置限制
pcs constraint location ost-$last_number prefers $host_node=600
for i in "${!host_groups_array[@]}"; do
group="${host_groups_array[$i]}"
IFS=',' read -ra nodes <<< "$group"
if [[ " ${nodes[*]} " != *" $host_node "* ]]; then
for node in "${nodes[@]}"; do
pcs constraint location ost-$last_number avoids $node
done
fi
done
echo "3 ($(date))"
pcs resource enable ost-$last_number
echo "4 ($(date))"
;;
*)
echo "未知角色:$role"
;;
esac
done < "$input_file"
echo "pcs create end ($(date))"
pcs_create.log
Description: Binary data
_______________________________________________ Manage your subscription: https://lists.clusterlabs.org/mailman/listinfo/users ClusterLabs home: https://www.clusterlabs.org/
