This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 67b7128e8a [tools](tpcds) fix bug of generating and loading data (#17835) 67b7128e8a is described below commit 67b7128e8a26942cf9ecedd8e549bf22d818430e Author: Dongyang Li <hello_step...@qq.com> AuthorDate: Thu Mar 16 11:59:39 2023 +0800 [tools](tpcds) fix bug of generating and loading data (#17835) --------- Co-authored-by: stephen <hello_stephen@@qq.com> --- tools/tpcds-tools/bin/gen-tpcds-data.sh | 20 ++++++++++++++++---- tools/tpcds-tools/bin/load-tpcds-data.sh | 6 +++--- tools/tpcds-tools/bin/run-tpcds-queries.sh | 4 ++-- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/tools/tpcds-tools/bin/gen-tpcds-data.sh b/tools/tpcds-tools/bin/gen-tpcds-data.sh index e0dcea64d9..d18bb702f0 100755 --- a/tools/tpcds-tools/bin/gen-tpcds-data.sh +++ b/tools/tpcds-tools/bin/gen-tpcds-data.sh @@ -36,7 +36,7 @@ usage() { Usage: $0 <options> Optional options: -s scale factor, default is 1 - -c parallelism to generate data of (lineitem, orders, partsupp) table, default is 10 + -c parallelism to generate data, default is 10, max is 100 Eg. $0 generate data using default value. @@ -114,12 +114,24 @@ date cd "${TPCDS_DBGEN_DIR}" if [[ ${PARALLEL} -eq 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then echo "data genarated." -elif [[ ${PARALLEL} -gt 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -PARALLEL "${PARALLEL}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then +elif [[ ${PARALLEL} -gt 1 ]] && [[ ${PARALLEL} -le 100 ]]; then + for c in $(seq 1 "${PARALLEL}"); do + "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -PARALLEL "${PARALLEL}" -CHILD "${c}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}" & + done + wait echo "data genarated." else - echo "ERROR occured." && exit 1 + echo "ERROR: bad parallelism ${PARALLEL}" && exit 1 fi -cd - +cd "${TPCDS_DATA_DIR}" +echo "Convert encoding of customer table files from one iso-8859-1 to utf-8." +for i in $(seq 1 "${PARALLEL}"); do + if ! [[ -f "customer_${i}_${PARALLEL}.dat" ]]; then continue; fi + mv "customer_${i}_${PARALLEL}.dat" "customer_${i}_${PARALLEL}.dat.bak" + iconv -f iso-8859-1 -t utf-8 "customer_${i}_${PARALLEL}.dat.bak" -o "customer_${i}_${PARALLEL}.dat" + rm "customer_${i}_${PARALLEL}.dat.bak" +done date + # check data du -sh "${TPCDS_DATA_DIR}"/*.dat* diff --git a/tools/tpcds-tools/bin/load-tpcds-data.sh b/tools/tpcds-tools/bin/load-tpcds-data.sh index a127796705..117bc83821 100755 --- a/tools/tpcds-tools/bin/load-tpcds-data.sh +++ b/tools/tpcds-tools/bin/load-tpcds-data.sh @@ -168,7 +168,8 @@ for table_name in ${!table_columns[*]}; do # 要批量执行的命令放在大括号内, 后台运行 { - for file in "${TPCDS_DATA_DIR}/${table_name}"*.dat; do + for file in "${TPCDS_DATA_DIR}/${table_name}"_{1..100}_*.dat; do + if ! [[ -f "${file}" ]]; then continue; fi ret=$(curl \ --location-trusted \ -u "${USER}":"${PASSWORD:=}" \ @@ -179,10 +180,9 @@ for table_name in ${!table_columns[*]}; do if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then echo "----loaded ${file}" else - echo -e "\033[31m----load ${file} FAIL...\033[0m" + echo -e "\033[31m----load ${file} FAIL...\n${ret}\033[0m" fi done - sleep 2 # 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行 echo >&3 } & diff --git a/tools/tpcds-tools/bin/run-tpcds-queries.sh b/tools/tpcds-tools/bin/run-tpcds-queries.sh index 46cc77db86..87ea92f1e0 100755 --- a/tools/tpcds-tools/bin/run-tpcds-queries.sh +++ b/tools/tpcds-tools/bin/run-tpcds-queries.sh @@ -17,7 +17,7 @@ # under the License. ############################################################## -# This script is used to run TPC-DS 99 queries +# This script is used to run TPC-DS 103 queries ############################################################## set -eo pipefail @@ -33,7 +33,7 @@ TPCDS_QUERIES_DIR="${CURDIR}/../queries" usage() { echo " -This script is used to run TPC-DS 99 queries, +This script is used to run TPC-DS 103 queries, will use mysql client to connect Doris server which parameter is specified in doris-cluster.conf file. Usage: $0 " --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org