This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 67b7128e8a [tools](tpcds) fix bug of generating and loading data 
(#17835)
67b7128e8a is described below

commit 67b7128e8a26942cf9ecedd8e549bf22d818430e
Author: Dongyang Li <hello_step...@qq.com>
AuthorDate: Thu Mar 16 11:59:39 2023 +0800

    [tools](tpcds) fix bug of generating and loading data (#17835)
    
    
    
    ---------
    
    Co-authored-by: stephen <hello_stephen@@qq.com>
---
 tools/tpcds-tools/bin/gen-tpcds-data.sh    | 20 ++++++++++++++++----
 tools/tpcds-tools/bin/load-tpcds-data.sh   |  6 +++---
 tools/tpcds-tools/bin/run-tpcds-queries.sh |  4 ++--
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/tools/tpcds-tools/bin/gen-tpcds-data.sh 
b/tools/tpcds-tools/bin/gen-tpcds-data.sh
index e0dcea64d9..d18bb702f0 100755
--- a/tools/tpcds-tools/bin/gen-tpcds-data.sh
+++ b/tools/tpcds-tools/bin/gen-tpcds-data.sh
@@ -36,7 +36,7 @@ usage() {
 Usage: $0 <options>
   Optional options:
      -s             scale factor, default is 1
-     -c             parallelism to generate data of (lineitem, orders, 
partsupp) table, default is 10
+     -c             parallelism to generate data, default is 10, max is 100
 
   Eg.
     $0              generate data using default value.
@@ -114,12 +114,24 @@ date
 cd "${TPCDS_DBGEN_DIR}"
 if [[ ${PARALLEL} -eq 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE 
"${SCALE_FACTOR}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then
     echo "data genarated."
-elif [[ ${PARALLEL} -gt 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE 
"${SCALE_FACTOR}" -PARALLEL "${PARALLEL}" -TERMINATE N -DIR 
"${TPCDS_DATA_DIR}"; then
+elif [[ ${PARALLEL} -gt 1 ]] && [[ ${PARALLEL} -le 100 ]]; then
+    for c in $(seq 1 "${PARALLEL}"); do
+        "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -PARALLEL 
"${PARALLEL}" -CHILD "${c}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}" &
+    done
+    wait
     echo "data genarated."
 else
-    echo "ERROR occured." && exit 1
+    echo "ERROR: bad parallelism ${PARALLEL}" && exit 1
 fi
-cd -
+cd "${TPCDS_DATA_DIR}"
+echo "Convert encoding of customer table files from one iso-8859-1 to utf-8."
+for i in $(seq 1 "${PARALLEL}"); do
+    if ! [[ -f "customer_${i}_${PARALLEL}.dat" ]]; then continue; fi
+    mv "customer_${i}_${PARALLEL}.dat" "customer_${i}_${PARALLEL}.dat.bak"
+    iconv -f iso-8859-1 -t utf-8 "customer_${i}_${PARALLEL}.dat.bak" -o 
"customer_${i}_${PARALLEL}.dat"
+    rm "customer_${i}_${PARALLEL}.dat.bak"
+done
 date
+
 # check data
 du -sh "${TPCDS_DATA_DIR}"/*.dat*
diff --git a/tools/tpcds-tools/bin/load-tpcds-data.sh 
b/tools/tpcds-tools/bin/load-tpcds-data.sh
index a127796705..117bc83821 100755
--- a/tools/tpcds-tools/bin/load-tpcds-data.sh
+++ b/tools/tpcds-tools/bin/load-tpcds-data.sh
@@ -168,7 +168,8 @@ for table_name in ${!table_columns[*]}; do
 
     # 要批量执行的命令放在大括号内, 后台运行
     {
-        for file in "${TPCDS_DATA_DIR}/${table_name}"*.dat; do
+        for file in "${TPCDS_DATA_DIR}/${table_name}"_{1..100}_*.dat; do
+            if ! [[ -f "${file}" ]]; then continue; fi
             ret=$(curl \
                 --location-trusted \
                 -u "${USER}":"${PASSWORD:=}" \
@@ -179,10 +180,9 @@ for table_name in ${!table_columns[*]}; do
             if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then
                 echo "----loaded ${file}"
             else
-                echo -e "\033[31m----load ${file} FAIL...\033[0m"
+                echo -e "\033[31m----load ${file} FAIL...\n${ret}\033[0m"
             fi
         done
-        sleep 2
         # 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行
         echo >&3
     } &
diff --git a/tools/tpcds-tools/bin/run-tpcds-queries.sh 
b/tools/tpcds-tools/bin/run-tpcds-queries.sh
index 46cc77db86..87ea92f1e0 100755
--- a/tools/tpcds-tools/bin/run-tpcds-queries.sh
+++ b/tools/tpcds-tools/bin/run-tpcds-queries.sh
@@ -17,7 +17,7 @@
 # under the License.
 
 ##############################################################
-# This script is used to run TPC-DS 99 queries
+# This script is used to run TPC-DS 103 queries
 ##############################################################
 
 set -eo pipefail
@@ -33,7 +33,7 @@ TPCDS_QUERIES_DIR="${CURDIR}/../queries"
 
 usage() {
     echo "
-This script is used to run TPC-DS 99 queries, 
+This script is used to run TPC-DS 103 queries, 
 will use mysql client to connect Doris server which parameter is specified in 
doris-cluster.conf file.
 Usage: $0 
   "


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to