This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7a357426257 [Enhancement](tools) Support transaction for benchmarks 
loading (#31126)
7a357426257 is described below

commit 7a3574262573a9ce202429b32462a539150bd81c
Author: zclllyybb <zhaochan...@selectdb.com>
AuthorDate: Tue Feb 20 14:29:34 2024 +0800

    [Enhancement](tools) Support transaction for benchmarks loading (#31126)
---
 tools/clickbench-tools/load-clickbench-data.sh |  37 ++++--
 tools/ssb-tools/bin/load-ssb-data.sh           | 169 +++++++++++++++++++++----
 tools/tpcds-tools/bin/load-tpcds-data.sh       |  40 ++++--
 tools/tpch-tools/bin/load-tpch-data.sh         | 123 ++++++++++++++----
 4 files changed, 299 insertions(+), 70 deletions(-)

diff --git a/tools/clickbench-tools/load-clickbench-data.sh 
b/tools/clickbench-tools/load-clickbench-data.sh
index aff122edff4..41d17a8a76e 100755
--- a/tools/clickbench-tools/load-clickbench-data.sh
+++ b/tools/clickbench-tools/load-clickbench-data.sh
@@ -30,13 +30,18 @@ ROOT=$(
 
 CURDIR=${ROOT}
 DATA_DIR=$CURDIR/
-# DATA_DIR=/mnt/disk1/stephen/data/clickbench
 
 usage() {
     echo "
 This script is used to load ClickBench data, 
 will use mysql client to connect Doris server which is specified in 
conf/doris-cluster.conf file.
-Usage: $0 
+Usage: $0 <options>
+  Optional options:
+    -x              use transaction id. multi times of loading with the same 
id won't load duplicate data.
+
+  Eg.
+    $0              load data using default value.
+    $0 -x blabla    use transaction id \"blabla\".
   "
     exit 1
 }
@@ -44,11 +49,13 @@ Usage: $0
 OPTS=$(getopt \
     -n $0 \
     -o '' \
-    -o 'h' \
+    -o 'hx:' \
     -- "$@")
 eval set -- "$OPTS"
 
 HELP=0
+TXN_ID=""
+
 while true; do
     case "$1" in
     -h)
@@ -59,6 +66,10 @@ while true; do
         shift
         break
         ;;
+    -x)
+        TXN_ID=$2
+        shift 2
+        ;;
     *)
         echo "Internal error"
         exit 1
@@ -113,7 +124,6 @@ function load() {
         if [ ! -f "$DATA_DIR/hits_split${i}" ]; then
             echo "will download hits_split${i} to $DATA_DIR"
             wget --continue 
"https://doris-test-data.oss-cn-hongkong.aliyuncs.com/ClickBench/hits_split${i}";
 &
-            # wget --continue 
"https://doris-test-data.oss-cn-hongkong-internal.aliyuncs.com/ClickBench/hits_split${i}";
 &
             PID=$!
             wget_pids[${#wget_pids[@]}]=$PID
         fi
@@ -127,11 +137,20 @@ function load() {
     for i in $(seq 0 9); do
         echo -e "
         start loading hits_split${i}"
-        curl --location-trusted \
-            -u $USER:$PASSWORD \
-            -T "$DATA_DIR/hits_split${i}" \
-            -H 
"columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngine
 [...]
-            http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
+        if [[ -z ${TXN_ID} ]]; then
+            curl --location-trusted \
+                -u $USER:$PASSWORD \
+                -T "$DATA_DIR/hits_split${i}" \
+                -H 
"columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEn
 [...]
+                http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
+        else
+            curl --location-trusted \
+                -u $USER:$PASSWORD \
+                -T "$DATA_DIR/hits_split${i}" \
+                -H "label:${TXN_ID}_${i}" \
+                -H 
"columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEn
 [...]
+                http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
+        fi
     done
 }
 
diff --git a/tools/ssb-tools/bin/load-ssb-data.sh 
b/tools/ssb-tools/bin/load-ssb-data.sh
index efd3bf3109f..d30720d7f1e 100755
--- a/tools/ssb-tools/bin/load-ssb-data.sh
+++ b/tools/ssb-tools/bin/load-ssb-data.sh
@@ -36,11 +36,13 @@ usage() {
     echo "
 Usage: $0 <options>
   Optional options:
-    -c             parallelism to load data of lineorder table, default is 5.
+    -c              parallelism to load data of lineorder table, default is 5.
+    -x              use transaction id. multi times of loading with the same 
id won't load duplicate data.
 
   Eg.
     $0              load data using default value.
-    $0 -c 10        load lineorder table data using parallelism 10.     
+    $0 -c 10        load lineorder table data using parallelism 10.
+    $0 -x blabla    use transaction id \"blabla\".
   "
     exit 1
 }
@@ -48,13 +50,14 @@ Usage: $0 <options>
 OPTS=$(getopt \
     -n "$0" \
     -o '' \
-    -o 'hc:' \
+    -o 'hc:x:' \
     -- "$@")
 
 eval set -- "${OPTS}"
 
 PARALLEL=5
 HELP=0
+TXN_ID=""
 
 if [[ $# == 0 ]]; then
     usage
@@ -70,6 +73,10 @@ while true; do
         PARALLEL=$2
         shift 2
         ;;
+    -x)
+        TXN_ID=$2
+        shift 2
+        ;;
     --)
         shift
         break
@@ -110,9 +117,13 @@ run_sql() {
 
 load_lineitem_flat() {
     # Loading data in batches by year.
+    local flat_con_idx=0
     for con in 'lo_orderdate<19930101' 'lo_orderdate>=19930101 and 
lo_orderdate<19940101' 'lo_orderdate>=19940101 and lo_orderdate<19950101' 
'lo_orderdate>=19950101 and lo_orderdate<19960101' 'lo_orderdate>=19960101 and 
lo_orderdate<19970101' 'lo_orderdate>=19970101 and lo_orderdate<19980101' 
'lo_orderdate>=19980101'; do
         echo -e "\n${con}"
-        run_sql "
+        flat_con_idx=$((flat_con_idx + 1))
+
+        if [[ -z ${TXN_ID} ]]; then
+            run_sql "
 INSERT INTO lineorder_flat
 SELECT
     LO_ORDERDATE,
@@ -182,6 +193,79 @@ ON (s.s_suppkey = l.lo_suppkey)
 INNER JOIN part p
 ON (p.p_partkey = l.lo_partkey);
 "
+        else
+            run_sql "
+INSERT INTO lineorder_flat
+WITH LABEL \`${TXN_ID}_flat_${flat_con_idx}\`
+SELECT
+    LO_ORDERDATE,
+    LO_ORDERKEY,
+    LO_LINENUMBER,
+    LO_CUSTKEY,
+    LO_PARTKEY,
+    LO_SUPPKEY,
+    LO_ORDERPRIORITY,
+    LO_SHIPPRIORITY,
+    LO_QUANTITY,
+    LO_EXTENDEDPRICE,
+    LO_ORDTOTALPRICE,
+    LO_DISCOUNT,
+    LO_REVENUE,
+    LO_SUPPLYCOST,
+    LO_TAX,
+    LO_COMMITDATE,
+    LO_SHIPMODE,
+    C_NAME,
+    C_ADDRESS,
+    C_CITY,
+    C_NATION,
+    C_REGION,
+    C_PHONE,
+    C_MKTSEGMENT,
+    S_NAME,
+    S_ADDRESS,
+    S_CITY,
+    S_NATION,
+    S_REGION,
+    S_PHONE,
+    P_NAME,
+    P_MFGR,
+    P_CATEGORY,
+    P_BRAND,
+    P_COLOR,
+    P_TYPE,
+    P_SIZE,
+    P_CONTAINER
+FROM (
+    SELECT
+        lo_orderkey,
+        lo_linenumber,
+        lo_custkey,
+        lo_partkey,
+        lo_suppkey,
+        lo_orderdate,
+        lo_orderpriority,
+        lo_shippriority,
+        lo_quantity,
+        lo_extendedprice,
+        lo_ordtotalprice,
+        lo_discount,
+        lo_revenue,
+        lo_supplycost,
+        lo_tax,
+        lo_commitdate,
+        lo_shipmode
+    FROM lineorder
+    WHERE ${con}
+) l
+INNER JOIN customer c
+ON (c.c_custkey = l.lo_custkey)
+INNER JOIN supplier s
+ON (s.s_suppkey = l.lo_suppkey)
+INNER JOIN part p
+ON (p.p_partkey = l.lo_partkey);
+"
+        fi
     done
 }
 
@@ -200,36 +284,73 @@ echo "Start time: $(date)"
 echo "==========Start to load data into ssb tables=========="
 
 echo 'Loading data for table: part'
-curl --location-trusted -u "${USER}":"${PASSWORD}" \
-    -H "column_separator:|" \
-    -H 
"columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy"
 \
-    -T "${SSB_DATA_DIR}"/part.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
+if [[ -z ${TXN_ID} ]]; then
+    curl --location-trusted -u "${USER}":"${PASSWORD}" \
+        -H "column_separator:|" \
+        -H 
"columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy"
 \
+        -T "${SSB_DATA_DIR}"/part.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
+else
+    curl --location-trusted -u "${USER}":"${PASSWORD}" \
+        -H "label:${TXN_ID}_part" -H "column_separator:|" \
+        -H 
"columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy"
 \
+        -T "${SSB_DATA_DIR}"/part.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
+fi
 
 echo 'Loading data for table: date'
-curl --location-trusted -u "${USER}":"${PASSWORD}" \
-    -H "column_separator:|" \
-    -H 
"columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy"
 \
-    -T "${SSB_DATA_DIR}"/date.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
+if [[ -z ${TXN_ID} ]]; then
+    curl --location-trusted -u "${USER}":"${PASSWORD}" \
+        -H "label:${TXN_ID}_date" -H "column_separator:|" \
+        -H 
"columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy"
 \
+        -T "${SSB_DATA_DIR}"/date.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
+else
+    curl --location-trusted -u "${USER}":"${PASSWORD}" \
+        -H "column_separator:|" \
+        -H 
"columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy"
 \
+        -T "${SSB_DATA_DIR}"/date.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
+fi
 
 echo 'Loading data for table: supplier'
-curl --location-trusted -u "${USER}":"${PASSWORD}" \
-    -H "column_separator:|" \
-    -H 
"columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
-    -T "${SSB_DATA_DIR}"/supplier.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
+if [[ -z ${TXN_ID} ]]; then
+    curl --location-trusted -u "${USER}":"${PASSWORD}" \
+        -H "column_separator:|" \
+        -H 
"columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
+        -T "${SSB_DATA_DIR}"/supplier.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
+else
+    curl --location-trusted -u "${USER}":"${PASSWORD}" \
+        -H "label:${TXN_ID}_supplier" -H "column_separator:|" \
+        -H 
"columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
+        -T "${SSB_DATA_DIR}"/supplier.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
+fi
 
 echo 'Loading data for table: customer'
-curl --location-trusted -u "${USER}":"${PASSWORD}" \
-    -H "column_separator:|" \
-    -H 
"columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use"
 \
-    -T "${SSB_DATA_DIR}"/customer.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
+if [[ -z ${TXN_ID} ]]; then
+    curl --location-trusted -u "${USER}":"${PASSWORD}" \
+        -H "column_separator:|" \
+        -H 
"columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use"
 \
+        -T "${SSB_DATA_DIR}"/customer.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
+else
+    curl --location-trusted -u "${USER}":"${PASSWORD}" \
+        -H "label:${TXN_ID}_customer" -H "column_separator:|" \
+        -H 
"columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use"
 \
+        -T "${SSB_DATA_DIR}"/customer.tbl 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
+fi
 
 echo "Loading data for table: lineorder, with ${PARALLEL} parallel"
 function load() {
     echo "$@"
-    curl --location-trusted -u "${USER}":"${PASSWORD}" \
-        -H "column_separator:|" \
-        -H 
"columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy"
 \
-        -T "$@" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
+    # shellcheck disable=SC2016,SC2124
+    local FILE_ID="${@//*./}"
+    if [[ -z ${TXN_ID} ]]; then
+        curl --location-trusted -u "${USER}":"${PASSWORD}" \
+            -H "column_separator:|" \
+            -H 
"columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy"
 \
+            -T "$@" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
+    else
+        curl --location-trusted -u "${USER}":"${PASSWORD}" \
+            -H "label:${TXN_ID}_lineorder_${FILE_ID}" -H "column_separator:|" \
+            -H 
"columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy"
 \
+            -T "$@" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
+    fi
 }
 
 # set parallelism
diff --git a/tools/tpcds-tools/bin/load-tpcds-data.sh 
b/tools/tpcds-tools/bin/load-tpcds-data.sh
index d34021076d6..ab9fb9d3361 100755
--- a/tools/tpcds-tools/bin/load-tpcds-data.sh
+++ b/tools/tpcds-tools/bin/load-tpcds-data.sh
@@ -35,11 +35,13 @@ usage() {
     echo "
 Usage: $0 <options>
   Optional options:
-     -c             parallelism to load data of lineitem, orders, partsupp, 
default is 5.
+    -c             parallelism to load data of lineitem, orders, partsupp, 
default is 5.
+    -x              use transaction id. multi times of loading with the same 
id won't load duplicate data.
 
   Eg.
     $0              load data using default value.
-    $0 -c 10        load lineitem, orders, partsupp table data using 
parallelism 10.     
+    $0 -c 10        load lineitem, orders, partsupp table data using 
parallelism 10.
+    $0 -x blabla    use transaction id \"blabla\".
   "
     exit 1
 }
@@ -47,13 +49,14 @@ Usage: $0 <options>
 OPTS=$(getopt \
     -n "$0" \
     -o '' \
-    -o 'hc:' \
+    -o 'hc:x:' \
     -- "$@")
 
 eval set -- "${OPTS}"
 
 PARALLEL=5
 HELP=0
+TXN_ID=""
 
 if [[ $# == 0 ]]; then
     usage
@@ -69,6 +72,10 @@ while true; do
         PARALLEL=$2
         shift 2
         ;;
+    -x)
+        TXN_ID=$2
+        shift 2
+        ;;
     --)
         shift
         break
@@ -170,13 +177,26 @@ for table_name in ${!table_columns[*]}; do
     {
         for file in "${TPCDS_DATA_DIR}/${table_name}"_{1..100}_*.dat; do
             if ! [[ -f "${file}" ]]; then continue; fi
-            ret=$(curl \
-                --location-trusted \
-                -u "${USER}":"${PASSWORD:=}" \
-                -H "column_separator:|" \
-                -H "columns: ${table_columns[${table_name}]}" \
-                -T "${file}" \
-                
http://"${FE_HOST}":"${FE_HTTP_PORT:=8030}"/api/"${DB}"/"${table_name}"/_stream_load
 2>/dev/null)
+            FILE_ID=$(echo "${file}" | awk -F'/' '{print $(NF)}' | awk -F'.' 
'{print $(1)}')
+            if [[ -z ${TXN_ID} ]]; then
+                ret=$(curl \
+                    --location-trusted \
+                    -u "${USER}":"${PASSWORD:-}" \
+                    -H "column_separator:|" \
+                    -H "columns: ${table_columns[${table_name}]}" \
+                    -T "${file}" \
+                    
http://"${FE_HOST}":"${FE_HTTP_PORT:-8030}"/api/"${DB}"/"${table_name}"/_stream_load
 2>/dev/null)
+            else
+                ret=$(curl \
+                    --location-trusted \
+                    -u "${USER}":"${PASSWORD:-}" \
+                    -H "label:${TXN_ID}_${FILE_ID}" \
+                    -H "column_separator:|" \
+                    -H "columns: ${table_columns[${table_name}]}" \
+                    -T "${file}" \
+                    
http://"${FE_HOST}":"${FE_HTTP_PORT:-8030}"/api/"${DB}"/"${table_name}"/_stream_load
 2>/dev/null)
+            fi
+
             if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then
                 echo "----loaded ${file}"
             else
diff --git a/tools/tpch-tools/bin/load-tpch-data.sh 
b/tools/tpch-tools/bin/load-tpch-data.sh
index c56d2ea71ac..fa21028f2e9 100755
--- a/tools/tpch-tools/bin/load-tpch-data.sh
+++ b/tools/tpch-tools/bin/load-tpch-data.sh
@@ -36,11 +36,13 @@ usage() {
     echo "
 Usage: $0 <options>
   Optional options:
-     -c             parallelism to load data of lineitem, orders, partsupp, 
default is 5.
+    -c             parallelism to load data of lineitem, orders, partsupp, 
default is 5.
+    -x              use transaction id. multi times of loading with the same 
id won't load duplicate data.
 
   Eg.
     $0              load data using default value.
-    $0 -c 10        load lineitem, orders, partsupp table data using 
parallelism 10.     
+    $0 -c 10        load lineitem, orders, partsupp table data using 
parallelism 10.
+    $0 -x blabla    use transaction id \"blabla\".
   "
     exit 1
 }
@@ -48,13 +50,14 @@ Usage: $0 <options>
 OPTS=$(getopt \
     -n "$0" \
     -o '' \
-    -o 'hc:' \
+    -o 'hc:x:' \
     -- "$@")
 
 eval set -- "${OPTS}"
 
 PARALLEL=5
 HELP=0
+TXN_ID=""
 
 if [[ $# == 0 ]]; then
     usage
@@ -70,6 +73,10 @@ while true; do
         PARALLEL=$2
         shift 2
         ;;
+    -x)
+        TXN_ID=$2
+        shift 2
+        ;;
     --)
         shift
         break
@@ -116,51 +123,113 @@ echo "DB: ${DB}"
 
 function load_region() {
     echo "$*"
-    curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
-        -H "columns: r_regionkey, r_name, r_comment, temp" \
-        -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
+    if [[ -z ${TXN_ID} ]]; then
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "columns: r_regionkey, r_name, r_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
+    else
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "label:${TXN_ID}_region" \
+            -H "columns: r_regionkey, r_name, r_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
+    fi
 }
 function load_nation() {
     echo "$*"
-    curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
-        -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
-        -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
+    if [[ -z ${TXN_ID} ]]; then
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
+    else
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "label:${TXN_ID}_nation" \
+            -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
+    fi
 }
 function load_supplier() {
     echo "$*"
-    curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
-        -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, 
s_acctbal, s_comment, temp" \
-        -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
+    if [[ -z ${TXN_ID} ]]; then
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, 
s_acctbal, s_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
+    else
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "label:${TXN_ID}_supplier" \
+            -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, 
s_acctbal, s_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
+    fi
 }
 function load_customer() {
     echo "$*"
-    curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
-        -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, 
c_acctbal, c_mktsegment, c_comment, temp" \
-        -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
+    if [[ -z ${TXN_ID} ]]; then
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, 
c_acctbal, c_mktsegment, c_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
+    else
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "label:${TXN_ID}_customer" \
+            -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, 
c_acctbal, c_mktsegment, c_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
+    fi
 }
 function load_part() {
     echo "$*"
-    curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
-        -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, 
p_container, p_retailprice, p_comment, temp" \
-        -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
+    if [[ -z ${TXN_ID} ]]; then
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, 
p_container, p_retailprice, p_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
+    else
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "label:${TXN_ID}_part" \
+            -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, 
p_container, p_retailprice, p_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
+    fi
 }
 function load_partsupp() {
     echo "$*"
-    curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
-        -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, 
ps_comment, temp" \
-        -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
+    # shellcheck disable=SC2016,SC2124
+    local FILE_ID="${@//*./}"
+    if [[ -z ${TXN_ID} ]]; then
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, 
ps_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
+    else
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "label:${TXN_ID}_partsupp_${FILE_ID}" \
+            -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, 
ps_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
+    fi
 }
 function load_orders() {
     echo "$*"
-    curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
-        -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, 
o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
-        -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
+    # shellcheck disable=SC2016,SC2124
+    local FILE_ID="${@//*./}"
+    if [[ -z ${TXN_ID} ]]; then
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, 
o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
+    else
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "label:${TXN_ID}_orders_${FILE_ID}" \
+            -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, 
o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
+    fi
 }
 function load_lineitem() {
     echo "$*"
-    curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
-        -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, 
l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, 
l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" 
\
-        -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
+    # shellcheck disable=SC2016,SC2124
+    local FILE_ID="${@//*./}"
+    if [[ -z ${TXN_ID} ]]; then
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, 
l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, 
l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" 
\
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
+    else
+        curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H 
"column_separator:|" \
+            -H "label:${TXN_ID}_lineitem_${FILE_ID}" \
+            -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, 
l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, 
l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" 
\
+            -T "$*" 
http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
+    fi
 }
 
 # start load


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to