This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f640e87d8a96254bbef5de372c9bdbb99881e379
Author: Yongqiang YANG <98214048+dataroar...@users.noreply.github.com>
AuthorDate: Sat Sep 21 08:38:48 2024 +0800

    [fix](deploy) refine fdb_ctl.sh (#41070)
---
 build.sh              |   3 +
 tools/fdb/fdb_ctl.sh  | 151 ++++++++++++++++++++++++++++++++++++++------------
 tools/fdb/fdb_vars.sh |  35 ++++++++++--
 3 files changed, 147 insertions(+), 42 deletions(-)

diff --git a/build.sh b/build.sh
index 2eca69a17a6..a4f9c329cb9 100755
--- a/build.sh
+++ b/build.sh
@@ -883,6 +883,9 @@ if [[ ${BUILD_CLOUD} -eq 1 ]]; then
     cp -r -p "${DORIS_HOME}/cloud/output" "${DORIS_HOME}/output/ms"
 fi
 
+mkdir -p "${DORIS_HOME}/output/tools"
+cp -r -p tools/fdb "${DORIS_HOME}/output/tools"
+
 echo "***************************************"
 echo "Successfully build Doris"
 echo "***************************************"
diff --git a/tools/fdb/fdb_ctl.sh b/tools/fdb/fdb_ctl.sh
index 9c809abd5d4..09aaaaf3f2a 100755
--- a/tools/fdb/fdb_ctl.sh
+++ b/tools/fdb/fdb_ctl.sh
@@ -77,7 +77,7 @@ function ensure_port_is_listenable() {
 
 function download_fdb() {
     if [[ -d "${FDB_PKG_DIR}" ]]; then
-        echo "FDB ${FDB_VERSION} already exists"
+        echo "FDB package for ${FDB_VERSION} already exists"
         return
     fi
 
@@ -135,37 +135,94 @@ get_fdb_mode() {
 
 # Function to calculate number of processes
 calculate_process_numbers() {
-    # local memory_gb=$1
-    local cpu_cores=$2
+    local memory_limit_gb=$1
+    local cpu_cores_limit=$2
 
-    local min_processes=1
     local data_dir_count
 
     # Convert comma-separated DATA_DIRS into an array
     IFS=',' read -r -a DATA_DIR_ARRAY <<<"${DATA_DIRS}"
     data_dir_count=${#DATA_DIR_ARRAY[@]}
 
-    # Stateless processes (at least 1, up to 1/4 of CPU cores)
-    local stateless_processes=$((cpu_cores / 4))
-    [[ ${stateless_processes} -lt ${min_processes} ]] && 
stateless_processes=${min_processes}
+    # Parse the ratio input
+    IFS=':' read -r num_storage num_stateless num_log 
<<<"${STORAGE_STATELESS_LOG_RATIO}"
 
-    # Storage processes (must be a multiple of the number of data directories)
-    local storage_processes=$((cpu_cores / 4))
-    [[ ${storage_processes} -lt ${data_dir_count} ]] && 
storage_processes=${data_dir_count}
-    storage_processes=$(((storage_processes / data_dir_count) * 
data_dir_count))
+    # Initialize process counts
+    local storage_processes=0   # Storage processes
+    local stateless_processes=0 # Stateless processes
+    local log_processes=0       # Log processes
 
-    # Transaction processes (must be a multiple of the number of data 
directories)
-    local transaction_processes=$((cpu_cores / 8))
-    [[ ${transaction_processes} -lt ${min_processes} ]] && 
transaction_processes=${min_processes}
-    [[ ${transaction_processes} -lt ${data_dir_count} ]] && 
transaction_processes=${data_dir_count}
-    transaction_processes=$(((transaction_processes / data_dir_count) * 
data_dir_count))
+    local storage_process_num_limit=$((STORAGE_PROCESSES_NUM_PER_SSD * 
data_dir_count))
+    local log_process_num_limit=$((LOG_PROCESSES_NUM_PER_SSD * data_dir_count))
+
+    if [[ "#${MEDIUM_TYPE}" = "#HDD" ]]; then
+        storage_process_num_limit=$((STORAGE_PROCESSES_NUM_PER_HDD * 
data_dir_count))
+        log_process_num_limit=$((LOG_PROCESSES_NUM_PER_HDD * data_dir_count))
+    fi
+
+    # Find maximum number of processes while maintaining the specified ratio
+    while true; do
+        # Calculate process counts based on the ratio
+        storage_processes=$((storage_processes + num_storage))
+        stateless_processes=$((storage_processes * num_stateless / 
num_storage))
+        log_processes=$((storage_processes * num_log / num_storage))
+
+        # Calculate total CPUs used
+        local total_cpu_used=$((storage_processes + stateless_processes + 
log_processes))
+
+        # Check memory constraint
+        local total_memory_used=$(((MEMORY_STORAGE_GB * storage_processes) + 
(MEMORY_STATELESS_GB * stateless_processes) + (MEMORY_LOG_GB * log_processes)))
+
+        # Check datadir limits
+        if ((storage_processes > storage_process_num_limit || log_processes > 
log_process_num_limit)); then
+            break
+        fi
+
+        # Check overall constraints
+        if ((total_memory_used <= memory_limit_gb && total_cpu_used <= 
cpu_cores_limit)); then
+            continue
+        else
+            # If constraints are violated, revert back
+            storage_processes=$((storage_processes - num_storage))
+            stateless_processes=$((storage_processes * num_stateless / 
num_storage))
+            log_processes=$((storage_processes * num_log / num_storage))
+            break
+        fi
+    done
 
     # Return the values
-    echo "${stateless_processes} ${storage_processes} ${transaction_processes}"
+    echo "${stateless_processes} ${storage_processes} ${log_processes}"
+}
+
+function check_vars() {
+    IFS=',' read -r -a IPS <<<"${FDB_CLUSTER_IPS}"
+
+    command -v ping || echo "ping is not available to check machines are 
available, please install ping."
+
+    for IP_ADDRESS in "${IPS[@]}"; do
+        if ping -c 1 "${IP_ADDRESS}" &>/dev/null; then
+            echo "${IP_ADDRESS} is reachable"
+        else
+            echo "${IP_ADDRESS} is not reachable"
+            exit 1
+        fi
+    done
+
+    if [[ ${CPU_CORES_LIMIT} -gt $(nproc) ]]; then
+        echo "CPU_CORES_LIMIT beyonds number of machine, which is $(nproc)"
+        exit 1
+    fi
+
+    if [[ ${MEMORY_LIMIT_GB} -gt $(free -g | awk '/^Mem:/{print $2}') ]]; then
+        echo "MEMORY_LIMIT_GB beyonds memory of machine, which is $(free -g | 
awk '/^Mem:/{print $2}')"
+        exit 1
+    fi
 }
 
 function deploy_fdb() {
+    check_vars
     download_fdb
+    check_fdb_running
 
     ln -sf "${FDB_PKG_DIR}/fdbserver" "${FDB_HOME}/fdbserver"
     ln -sf "${FDB_PKG_DIR}/fdbmonitor" "${FDB_HOME}/fdbmonitor"
@@ -178,6 +235,10 @@ function deploy_fdb() {
     IFS=',' read -r -a DATA_DIR_ARRAY <<<"${DATA_DIRS}"
     for DIR in "${DATA_DIR_ARRAY[@]}"; do
         mkdir -p "${DIR}" || handle_error "Failed to create data directory 
${DIR}"
+        if [[ -n "$(ls -A "${DIR}")" ]]; then
+            echo "Error: ${DIR} is not empty. DO NOT run deploy on a node 
running fdb. If you are sure that the node is not in a fdb cluster, run 
fdb_ctl.sh clean."
+            exit 1
+        fi
     done
 
     echo -e "\tCreate fdb.cluster, coordinator: $(get_coordinators)"
@@ -210,7 +271,14 @@ EOF
     CPU_CORES_LIMIT=${CPU_CORES_LIMIT:-1}
 
     # Calculate number of processes based on resources and data directories
-    read -r stateless_processes storage_processes transaction_processes 
<<<"$(calculate_process_numbers "${MEMORY_LIMIT_GB}" "${CPU_CORES_LIMIT}")"
+    read -r stateless_processes storage_processes log_processes 
<<<"$(calculate_process_numbers "${MEMORY_LIMIT_GB}" "${CPU_CORES_LIMIT}")"
+    echo "stateless process num : ${stateless_processes}, storage_processes : 
${storage_processes}, log_processes : ${log_processes}"
+    if [[ ${storage_processes} -eq 0 ]]; then
+        # Add one process
+        PORT=$((FDB_PORT))
+        echo "[fdbserver.${PORT}]
+" >>"${FDB_HOME}/conf/fdb.conf"
+    fi
 
     # Add stateless processes
     for ((i = 0; i < stateless_processes; i++)); do
@@ -233,12 +301,12 @@ datadir = ${DATA_DIR_ARRAY[${DIR_INDEX}]}/${PORT}" | tee 
-a "${FDB_HOME}/conf/fd
 
     FDB_PORT=$((FDB_PORT + storage_processes))
 
-    # Add transaction processes
-    for ((i = 0; i < transaction_processes; i++)); do
+    # Add log processes
+    for ((i = 0; i < log_processes; i++)); do
         PORT=$((FDB_PORT + i))
         DIR_INDEX=$((i % STORAGE_DIR_COUNT))
         echo "[fdbserver.${PORT}]
-class = transaction
+class = log
 datadir = ${DATA_DIR_ARRAY[${DIR_INDEX}]}/${PORT}" | tee -a 
"${FDB_HOME}/conf/fdb.conf" >/dev/null
     done
 
@@ -250,6 +318,8 @@ logdir = ${LOG_DIR}" >>"${FDB_HOME}/conf/fdb.conf"
 }
 
 function start_fdb() {
+    check_fdb_running
+
     if [[ ! -f "${FDB_HOME}/fdbmonitor" ]]; then
         echo 'Please run setup before start fdb server'
         exit 1
@@ -275,6 +345,18 @@ function stop_fdb() {
     fi
 }
 
+function check_fdb_running() {
+    if [[ -f "${FDB_HOME}/fdbmonitor.pid" ]]; then
+        local fdb_pid
+
+        fdb_pid=$(cat "${FDB_HOME}/fdbmonitor.pid")
+        if ps -p "${fdb_pid}" >/dev/null; then
+            echo "fdbmonitor with pid ${fdb_pid} is running, stop it first."
+            exit 1
+        fi
+    fi
+}
+
 function clean_fdb() {
     if [[ -f "${FDB_HOME}/fdbmonitor.pid" ]]; then
         local fdb_pid
@@ -307,8 +389,6 @@ function clean_fdb() {
 
 function deploy() {
     local job="$1"
-    local skip_pkg="$2"
-    local skip_config="$3"
 
     if [[ ${job} =~ ^(all|fdb)$ ]]; then
         deploy_fdb
@@ -324,16 +404,21 @@ function start() {
     fi
 
     if [[ ${init} =~ ^(all|fdb)$ ]]; then
-        echo "Try create database ..."
         local fdb_mode
 
         fdb_mode=$(get_fdb_mode)
+
+        echo "Try create database in fdb ${fdb_mode}"
+
         "${FDB_HOME}/fdbcli" -C "${FDB_HOME}/conf/fdb.cluster" \
-            --exec "configure new ${fdb_mode} ssd" || true
+            --exec "configure new ${fdb_mode} ssd" ||
+            "${FDB_HOME}/fdbcli" -C "${FDB_HOME}/conf/fdb.cluster" --exec 
"status" ||
+            (echo "failed to start fdb, please check that all nodes have same 
FDB_CLUSTER_ID" &&
+                exit 1)
     fi
 
-    echo "Start fdb success, and the cluster is:"
-    cat "${FDB_HOME}/conf/fdb.cluster"
+    echo "Start fdb success, and you can set conf for MetaService:"
+    echo "fdb_cluster = $(cat "${FDB_HOME}"/conf/fdb.cluster)"
 }
 
 function stop() {
@@ -359,16 +444,12 @@ function status() {
 }
 
 function usage() {
-    echo "Usage: $0 <CMD> [--skip-pkg] [--skip-config]"
+    echo "Usage: $0 <CMD> "
     echo -e "\t deploy \t setup fdb env (dir, binary, conf ...)"
     echo -e "\t clean  \t clean fdb data"
     echo -e "\t start  \t start fdb"
     echo -e "\t stop   \t stop fdb"
-    echo -e ""
-    echo -e ""
-    echo -e "Args:"
-    echo -e "\t --skip-pkg    \t skip to update binary pkgs during deploy"
-    echo -e "\t --skip-config \t skip to update config during deploy"
+    echo -e "\t fdbcli \t stop fdb"
     echo -e ""
     exit 1
 }
@@ -390,12 +471,10 @@ shift
 job="fdb"
 
 init="fdb"
-skip_pkg="false"
-skip_config="false"
 
 case ${cmd} in
 deploy)
-    deploy "${job}" "${skip_pkg}" "${skip_config}"
+    deploy "${job}"
     ;;
 start)
     start "${job}" "${init}"
diff --git a/tools/fdb/fdb_vars.sh b/tools/fdb/fdb_vars.sh
index c0bbadabdd6..0d4cc1667bc 100644
--- a/tools/fdb/fdb_vars.sh
+++ b/tools/fdb/fdb_vars.sh
@@ -25,13 +25,15 @@
 # shellcheck disable=2034
 
DATA_DIRS="/mnt/foundationdb/data1,/mnt/foundationdb/data2,/mnt/foundationdb/data3"
 
+MEDIUM_TYPE="SSD"
+
 # Define the cluster IPs (comma-separated list of IP addresses)
 # You should have at least 3 IP addresses for a production cluster
 # The first IP addresses will be used as the coordinator,
 # num of coordinators depends on the number of nodes, see the function 
get_coordinators.
 # For high availability, machines should be in diffrent rack.
 # shellcheck disable=2034
-FDB_CLUSTER_IPS="172.200.0.2,172.200.0.3,172.200.0.4"
+FDB_CLUSTER_IPS="172.200.0.5,172.200.0.6,172.200.0.7"
 
 # Define the FoundationDB home directory, which contains the fdb binaries and 
logs.
 # default is /fdbhome and have to be absolute path.
@@ -41,23 +43,23 @@ FDB_HOME="/fdbhome"
 # Define the cluster id, shoule be generated random like mktemp -u XXXXXXXX,
 # have to be different for each cluster.
 # shellcheck disable=2034
-FDB_CLUSTER_ID=$(mktemp -u XXXXXXXX)
+FDB_CLUSTER_ID="ra7eOp7x"
 
 # Define the cluster description, you 'd better to change it.
 # shellcheck disable=2034
 FDB_CLUSTER_DESC="mycluster"
 
-#======================= OPTIONAL CUSTOMIZATION ============================
 # Define resource limits
 # Memory limit in gigabytes
 # shellcheck disable=2034
-MEMORY_LIMIT_GB=16
+MEMORY_LIMIT_GB=64
 
 # CPU cores limit
 # shellcheck disable=2034
-CPU_CORES_LIMIT=8
+CPU_CORES_LIMIT=16
+
+#======================= OPTIONAL CUSTOMIZATION ============================
 
-#===========================================================================
 # Define starting port for the servers
 # This is the base port number for the fdbserver processes, usually does not 
need to be changed
 # shellcheck disable=2034
@@ -70,3 +72,24 @@ FDB_VERSION="7.1.38"
 # Users who run the fdb processes, default is the current user
 # shellcheck disable=2034
 USER=$(whoami)
+
+# ratio of storage, stateless and log process num in fdb
+# shellcheck disable=2034
+STORAGE_STATELESS_LOG_RATIO="2:1:1"
+
+# Set process limits
+# shellcheck disable=2034
+STORAGE_PROCESSES_NUM_PER_HDD=1
+# shellcheck disable=2034
+LOG_PROCESSES_NUM_PER_HDD=1
+# shellcheck disable=2034
+STORAGE_PROCESSES_NUM_PER_SSD=4
+# shellcheck disable=2034
+LOG_PROCESSES_NUM_PER_SSD=4
+
+# shellcheck disable=2034
+MEMORY_STORAGE_GB=8
+# shellcheck disable=2034
+MEMORY_STATELESS_GB=1
+# shellcheck disable=2034
+MEMORY_LOG_GB=2


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to