This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7a357426257 [Enhancement](tools) Support transaction for benchmarks loading (#31126) 7a357426257 is described below commit 7a3574262573a9ce202429b32462a539150bd81c Author: zclllyybb <zhaochan...@selectdb.com> AuthorDate: Tue Feb 20 14:29:34 2024 +0800 [Enhancement](tools) Support transaction for benchmarks loading (#31126) --- tools/clickbench-tools/load-clickbench-data.sh | 37 ++++-- tools/ssb-tools/bin/load-ssb-data.sh | 169 +++++++++++++++++++++---- tools/tpcds-tools/bin/load-tpcds-data.sh | 40 ++++-- tools/tpch-tools/bin/load-tpch-data.sh | 123 ++++++++++++++---- 4 files changed, 299 insertions(+), 70 deletions(-) diff --git a/tools/clickbench-tools/load-clickbench-data.sh b/tools/clickbench-tools/load-clickbench-data.sh index aff122edff4..41d17a8a76e 100755 --- a/tools/clickbench-tools/load-clickbench-data.sh +++ b/tools/clickbench-tools/load-clickbench-data.sh @@ -30,13 +30,18 @@ ROOT=$( CURDIR=${ROOT} DATA_DIR=$CURDIR/ -# DATA_DIR=/mnt/disk1/stephen/data/clickbench usage() { echo " This script is used to load ClickBench data, will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file. -Usage: $0 +Usage: $0 <options> + Optional options: + -x use transaction id. multi times of loading with the same id won't load duplicate data. + + Eg. + $0 load data using default value. + $0 -x blabla use transaction id \"blabla\". " exit 1 } @@ -44,11 +49,13 @@ Usage: $0 OPTS=$(getopt \ -n $0 \ -o '' \ - -o 'h' \ + -o 'hx:' \ -- "$@") eval set -- "$OPTS" HELP=0 +TXN_ID="" + while true; do case "$1" in -h) @@ -59,6 +66,10 @@ while true; do shift break ;; + -x) + TXN_ID=$2 + shift 2 + ;; *) echo "Internal error" exit 1 @@ -113,7 +124,6 @@ function load() { if [ ! -f "$DATA_DIR/hits_split${i}" ]; then echo "will download hits_split${i} to $DATA_DIR" wget --continue "https://doris-test-data.oss-cn-hongkong.aliyuncs.com/ClickBench/hits_split${i}" & - # wget --continue "https://doris-test-data.oss-cn-hongkong-internal.aliyuncs.com/ClickBench/hits_split${i}" & PID=$! wget_pids[${#wget_pids[@]}]=$PID fi @@ -127,11 +137,20 @@ function load() { for i in $(seq 0 9); do echo -e " start loading hits_split${i}" - curl --location-trusted \ - -u $USER:$PASSWORD \ - -T "$DATA_DIR/hits_split${i}" \ - -H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngine [...] - http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load + if [[ -z ${TXN_ID} ]]; then + curl --location-trusted \ + -u $USER:$PASSWORD \ + -T "$DATA_DIR/hits_split${i}" \ + -H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEn [...] + http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load + else + curl --location-trusted \ + -u $USER:$PASSWORD \ + -T "$DATA_DIR/hits_split${i}" \ + -H "label:${TXN_ID}_${i}" \ + -H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEn [...] + http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load + fi done } diff --git a/tools/ssb-tools/bin/load-ssb-data.sh b/tools/ssb-tools/bin/load-ssb-data.sh index efd3bf3109f..d30720d7f1e 100755 --- a/tools/ssb-tools/bin/load-ssb-data.sh +++ b/tools/ssb-tools/bin/load-ssb-data.sh @@ -36,11 +36,13 @@ usage() { echo " Usage: $0 <options> Optional options: - -c parallelism to load data of lineorder table, default is 5. + -c parallelism to load data of lineorder table, default is 5. + -x use transaction id. multi times of loading with the same id won't load duplicate data. Eg. $0 load data using default value. - $0 -c 10 load lineorder table data using parallelism 10. + $0 -c 10 load lineorder table data using parallelism 10. + $0 -x blabla use transaction id \"blabla\". " exit 1 } @@ -48,13 +50,14 @@ Usage: $0 <options> OPTS=$(getopt \ -n "$0" \ -o '' \ - -o 'hc:' \ + -o 'hc:x:' \ -- "$@") eval set -- "${OPTS}" PARALLEL=5 HELP=0 +TXN_ID="" if [[ $# == 0 ]]; then usage @@ -70,6 +73,10 @@ while true; do PARALLEL=$2 shift 2 ;; + -x) + TXN_ID=$2 + shift 2 + ;; --) shift break @@ -110,9 +117,13 @@ run_sql() { load_lineitem_flat() { # Loading data in batches by year. + local flat_con_idx=0 for con in 'lo_orderdate<19930101' 'lo_orderdate>=19930101 and lo_orderdate<19940101' 'lo_orderdate>=19940101 and lo_orderdate<19950101' 'lo_orderdate>=19950101 and lo_orderdate<19960101' 'lo_orderdate>=19960101 and lo_orderdate<19970101' 'lo_orderdate>=19970101 and lo_orderdate<19980101' 'lo_orderdate>=19980101'; do echo -e "\n${con}" - run_sql " + flat_con_idx=$((flat_con_idx + 1)) + + if [[ -z ${TXN_ID} ]]; then + run_sql " INSERT INTO lineorder_flat SELECT LO_ORDERDATE, @@ -182,6 +193,79 @@ ON (s.s_suppkey = l.lo_suppkey) INNER JOIN part p ON (p.p_partkey = l.lo_partkey); " + else + run_sql " +INSERT INTO lineorder_flat +WITH LABEL \`${TXN_ID}_flat_${flat_con_idx}\` +SELECT + LO_ORDERDATE, + LO_ORDERKEY, + LO_LINENUMBER, + LO_CUSTKEY, + LO_PARTKEY, + LO_SUPPKEY, + LO_ORDERPRIORITY, + LO_SHIPPRIORITY, + LO_QUANTITY, + LO_EXTENDEDPRICE, + LO_ORDTOTALPRICE, + LO_DISCOUNT, + LO_REVENUE, + LO_SUPPLYCOST, + LO_TAX, + LO_COMMITDATE, + LO_SHIPMODE, + C_NAME, + C_ADDRESS, + C_CITY, + C_NATION, + C_REGION, + C_PHONE, + C_MKTSEGMENT, + S_NAME, + S_ADDRESS, + S_CITY, + S_NATION, + S_REGION, + S_PHONE, + P_NAME, + P_MFGR, + P_CATEGORY, + P_BRAND, + P_COLOR, + P_TYPE, + P_SIZE, + P_CONTAINER +FROM ( + SELECT + lo_orderkey, + lo_linenumber, + lo_custkey, + lo_partkey, + lo_suppkey, + lo_orderdate, + lo_orderpriority, + lo_shippriority, + lo_quantity, + lo_extendedprice, + lo_ordtotalprice, + lo_discount, + lo_revenue, + lo_supplycost, + lo_tax, + lo_commitdate, + lo_shipmode + FROM lineorder + WHERE ${con} +) l +INNER JOIN customer c +ON (c.c_custkey = l.lo_custkey) +INNER JOIN supplier s +ON (s.s_suppkey = l.lo_suppkey) +INNER JOIN part p +ON (p.p_partkey = l.lo_partkey); +" + fi done } @@ -200,36 +284,73 @@ echo "Start time: $(date)" echo "==========Start to load data into ssb tables==========" echo 'Loading data for table: part' -curl --location-trusted -u "${USER}":"${PASSWORD}" \ - -H "column_separator:|" \ - -H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \ - -T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load +if [[ -z ${TXN_ID} ]]; then + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "column_separator:|" \ + -H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \ + -T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load +else + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "label:${TXN_ID}_part" -H "column_separator:|" \ + -H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \ + -T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load +fi echo 'Loading data for table: date' -curl --location-trusted -u "${USER}":"${PASSWORD}" \ - -H "column_separator:|" \ - -H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \ - -T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load +if [[ -z ${TXN_ID} ]]; then + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "label:${TXN_ID}_date" -H "column_separator:|" \ + -H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \ + -T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load +else + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "column_separator:|" \ + -H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \ + -T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load +fi echo 'Loading data for table: supplier' -curl --location-trusted -u "${USER}":"${PASSWORD}" \ - -H "column_separator:|" \ - -H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \ - -T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load +if [[ -z ${TXN_ID} ]]; then + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "column_separator:|" \ + -H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \ + -T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load +else + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "label:${TXN_ID}_supplier" -H "column_separator:|" \ + -H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \ + -T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load +fi echo 'Loading data for table: customer' -curl --location-trusted -u "${USER}":"${PASSWORD}" \ - -H "column_separator:|" \ - -H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \ - -T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load +if [[ -z ${TXN_ID} ]]; then + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "column_separator:|" \ + -H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \ + -T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load +else + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "label:${TXN_ID}_customer" -H "column_separator:|" \ + -H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \ + -T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load +fi echo "Loading data for table: lineorder, with ${PARALLEL} parallel" function load() { echo "$@" - curl --location-trusted -u "${USER}":"${PASSWORD}" \ - -H "column_separator:|" \ - -H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \ - -T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load + # shellcheck disable=SC2016,SC2124 + local FILE_ID="${@//*./}" + if [[ -z ${TXN_ID} ]]; then + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "column_separator:|" \ + -H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \ + -T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load + else + curl --location-trusted -u "${USER}":"${PASSWORD}" \ + -H "label:${TXN_ID}_lineorder_${FILE_ID}" -H "column_separator:|" \ + -H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \ + -T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load + fi } # set parallelism diff --git a/tools/tpcds-tools/bin/load-tpcds-data.sh b/tools/tpcds-tools/bin/load-tpcds-data.sh index d34021076d6..ab9fb9d3361 100755 --- a/tools/tpcds-tools/bin/load-tpcds-data.sh +++ b/tools/tpcds-tools/bin/load-tpcds-data.sh @@ -35,11 +35,13 @@ usage() { echo " Usage: $0 <options> Optional options: - -c parallelism to load data of lineitem, orders, partsupp, default is 5. + -c parallelism to load data of lineitem, orders, partsupp, default is 5. + -x use transaction id. multi times of loading with the same id won't load duplicate data. Eg. $0 load data using default value. - $0 -c 10 load lineitem, orders, partsupp table data using parallelism 10. + $0 -c 10 load lineitem, orders, partsupp table data using parallelism 10. + $0 -x blabla use transaction id \"blabla\". " exit 1 } @@ -47,13 +49,14 @@ Usage: $0 <options> OPTS=$(getopt \ -n "$0" \ -o '' \ - -o 'hc:' \ + -o 'hc:x:' \ -- "$@") eval set -- "${OPTS}" PARALLEL=5 HELP=0 +TXN_ID="" if [[ $# == 0 ]]; then usage @@ -69,6 +72,10 @@ while true; do PARALLEL=$2 shift 2 ;; + -x) + TXN_ID=$2 + shift 2 + ;; --) shift break @@ -170,13 +177,26 @@ for table_name in ${!table_columns[*]}; do { for file in "${TPCDS_DATA_DIR}/${table_name}"_{1..100}_*.dat; do if ! [[ -f "${file}" ]]; then continue; fi - ret=$(curl \ - --location-trusted \ - -u "${USER}":"${PASSWORD:=}" \ - -H "column_separator:|" \ - -H "columns: ${table_columns[${table_name}]}" \ - -T "${file}" \ - http://"${FE_HOST}":"${FE_HTTP_PORT:=8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null) + FILE_ID=$(echo "${file}" | awk -F'/' '{print $(NF)}' | awk -F'.' '{print $(1)}') + if [[ -z ${TXN_ID} ]]; then + ret=$(curl \ + --location-trusted \ + -u "${USER}":"${PASSWORD:-}" \ + -H "column_separator:|" \ + -H "columns: ${table_columns[${table_name}]}" \ + -T "${file}" \ + http://"${FE_HOST}":"${FE_HTTP_PORT:-8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null) + else + ret=$(curl \ + --location-trusted \ + -u "${USER}":"${PASSWORD:-}" \ + -H "label:${TXN_ID}_${FILE_ID}" \ + -H "column_separator:|" \ + -H "columns: ${table_columns[${table_name}]}" \ + -T "${file}" \ + http://"${FE_HOST}":"${FE_HTTP_PORT:-8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null) + fi + if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then echo "----loaded ${file}" else diff --git a/tools/tpch-tools/bin/load-tpch-data.sh b/tools/tpch-tools/bin/load-tpch-data.sh index c56d2ea71ac..fa21028f2e9 100755 --- a/tools/tpch-tools/bin/load-tpch-data.sh +++ b/tools/tpch-tools/bin/load-tpch-data.sh @@ -36,11 +36,13 @@ usage() { echo " Usage: $0 <options> Optional options: - -c parallelism to load data of lineitem, orders, partsupp, default is 5. + -c parallelism to load data of lineitem, orders, partsupp, default is 5. + -x use transaction id. multi times of loading with the same id won't load duplicate data. Eg. $0 load data using default value. - $0 -c 10 load lineitem, orders, partsupp table data using parallelism 10. + $0 -c 10 load lineitem, orders, partsupp table data using parallelism 10. + $0 -x blabla use transaction id \"blabla\". " exit 1 } @@ -48,13 +50,14 @@ Usage: $0 <options> OPTS=$(getopt \ -n "$0" \ -o '' \ - -o 'hc:' \ + -o 'hc:x:' \ -- "$@") eval set -- "${OPTS}" PARALLEL=5 HELP=0 +TXN_ID="" if [[ $# == 0 ]]; then usage @@ -70,6 +73,10 @@ while true; do PARALLEL=$2 shift 2 ;; + -x) + TXN_ID=$2 + shift 2 + ;; --) shift break @@ -116,51 +123,113 @@ echo "DB: ${DB}" function load_region() { echo "$*" - curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ - -H "columns: r_regionkey, r_name, r_comment, temp" \ - -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load + if [[ -z ${TXN_ID} ]]; then + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "columns: r_regionkey, r_name, r_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load + else + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "label:${TXN_ID}_region" \ + -H "columns: r_regionkey, r_name, r_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load + fi } function load_nation() { echo "$*" - curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ - -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \ - -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load + if [[ -z ${TXN_ID} ]]; then + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load + else + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "label:${TXN_ID}_nation" \ + -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load + fi } function load_supplier() { echo "$*" - curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ - -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \ - -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load + if [[ -z ${TXN_ID} ]]; then + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load + else + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "label:${TXN_ID}_supplier" \ + -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load + fi } function load_customer() { echo "$*" - curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ - -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \ - -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load + if [[ -z ${TXN_ID} ]]; then + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load + else + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "label:${TXN_ID}_customer" \ + -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load + fi } function load_part() { echo "$*" - curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ - -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \ - -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load + if [[ -z ${TXN_ID} ]]; then + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load + else + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "label:${TXN_ID}_part" \ + -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load + fi } function load_partsupp() { echo "$*" - curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ - -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \ - -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load + # shellcheck disable=SC2016,SC2124 + local FILE_ID="${@//*./}" + if [[ -z ${TXN_ID} ]]; then + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load + else + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "label:${TXN_ID}_partsupp_${FILE_ID}" \ + -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load + fi } function load_orders() { echo "$*" - curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ - -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \ - -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load + # shellcheck disable=SC2016,SC2124 + local FILE_ID="${@//*./}" + if [[ -z ${TXN_ID} ]]; then + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load + else + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "label:${TXN_ID}_orders_${FILE_ID}" \ + -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load + fi } function load_lineitem() { echo "$*" - curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ - -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \ - -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load + # shellcheck disable=SC2016,SC2124 + local FILE_ID="${@//*./}" + if [[ -z ${TXN_ID} ]]; then + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load + else + curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \ + -H "label:${TXN_ID}_lineitem_${FILE_ID}" \ + -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \ + -T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load + fi } # start load --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org