This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new c8316d14a0 [cherry-pick][branch-2.0](load) support line delimiter for old broker load in 2.0 (#22225) c8316d14a0 is described below commit c8316d14a0da50d95ee25659a1e7d99fac74232c Author: Siyang Tang <82279870+tangsiyang2...@users.noreply.github.com> AuthorDate: Thu Jul 27 09:48:45 2023 +0800 [cherry-pick][branch-2.0](load) support line delimiter for old broker load in 2.0 (#22225) --- .../Load/BROKER-LOAD.md | 5 + .../Load/BROKER-LOAD.md | 5 + fe/fe-core/src/main/cup/sql_parser.cup | 3 +- .../org/apache/doris/analysis/DataDescription.java | 26 +++ .../org/apache/doris/analysis/S3TvfLoadStmt.java | 23 +-- .../ddl/csv_s3_case_line_delimiter_create.sql | 25 +++ .../ddl/csv_s3_case_line_delimiter_drop.sql | 1 + .../load_p2/broker_load/test_broker_load.groovy | 180 +++++++++++---------- .../broker_load/test_tvf_based_broker_load.groovy | 133 +++------------ 9 files changed, 200 insertions(+), 201 deletions(-) diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md index 50b859113a..59956abc76 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md @@ -65,6 +65,7 @@ WITH BROKER broker_name INTO TABLE `table_name` [PARTITION (p1, p2, ...)] [COLUMNS TERMINATED BY "column_separator"] + [LINES TERMINATED BY "line_delimiter"] [FORMAT AS "file_type"] [(column_list)] [COLUMNS FROM PATH AS (c1, c2, ...)] @@ -96,6 +97,10 @@ WITH BROKER broker_name Specifies the column separator. Only valid in CSV format. Only single-byte delimiters can be specified. + - `LINES TERMINATED BY` + + Specifies the line delimiter. Only valid in CSV format. Only single-byte delimiters can be specified. + - `FORMAT AS` Specifies the file type, CSV, PARQUET and ORC formats are supported. Default is CSV. diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md index 73387d12da..eb294dcacf 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md @@ -66,6 +66,7 @@ WITH BROKER broker_name [PARTITION (p1, p2, ...)] [COLUMNS TERMINATED BY "column_separator"] [FORMAT AS "file_type"] + [LINES TERMINATED BY "line_delimiter"] [(column_list)] [COLUMNS FROM PATH AS (c1, c2, ...)] [SET (column_mapping)] @@ -96,6 +97,10 @@ WITH BROKER broker_name 指定列分隔符。仅在 CSV 格式下有效。仅能指定单字节分隔符。 + - `LINES TERMINATED BY` + + 指定行分隔符。仅在 CSV 格式下有效。仅能指定单字节分隔符。 + - `FORMAT AS` 指定文件类型,支持 CSV、PARQUET 和 ORC 格式。默认为 CSV。 diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 86d2dc97f7..a15bbc5a3c 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -2398,6 +2398,7 @@ data_desc ::= KW_INTO KW_TABLE ident:tableName opt_partition_names:partitionNames opt_field_term:colSep + opt_line_term:lineDelimiter opt_file_format:fileFormat opt_col_list:colList opt_columns_from_path:columnsFromPath @@ -2408,7 +2409,7 @@ data_desc ::= sequence_col_clause:sequenceColName opt_properties:properties {: - RESULT = new DataDescription(tableName, partitionNames, files, colList, colSep, fileFormat, + RESULT = new DataDescription(tableName, partitionNames, files, colList, colSep, lineDelimiter, fileFormat, columnsFromPath, isNeg, colMappingList, preFilterExpr, whereExpr, mergeType, deleteExpr, sequenceColName, properties); :} | opt_merge_type:mergeType KW_DATA KW_FROM KW_TABLE ident:srcTableName diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java index 658605abc5..88044394fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java @@ -168,11 +168,32 @@ public class DataDescription implements InsertStmt.DataDesc { isNegative, columnMappingList, null, null, LoadTask.MergeType.APPEND, null, null, null); } + public DataDescription(String tableName, + PartitionNames partitionNames, + List<String> filePaths, + List<String> columns, + Separator columnSeparator, + String fileFormat, + List<String> columnsFromPath, + boolean isNegative, + List<Expr> columnMappingList, + Expr fileFilterExpr, + Expr whereExpr, + LoadTask.MergeType mergeType, + Expr deleteCondition, + String sequenceColName, + Map<String, String> properties) { + this(tableName, partitionNames, filePaths, columns, columnSeparator, null, + fileFormat, columnsFromPath, isNegative, columnMappingList, fileFilterExpr, whereExpr, + mergeType, deleteCondition, sequenceColName, properties); + } + public DataDescription(String tableName, PartitionNames partitionNames, List<String> filePaths, List<String> columns, Separator columnSeparator, + Separator lineDelimiter, String fileFormat, List<String> columnsFromPath, boolean isNegative, @@ -188,6 +209,7 @@ public class DataDescription implements InsertStmt.DataDesc { this.filePaths = filePaths; this.fileFieldNames = columns; this.columnSeparator = columnSeparator; + this.lineDelimiter = lineDelimiter; this.fileFormat = fileFormat; this.columnsFromPath = columnsFromPath; this.isNegative = isNegative; @@ -597,6 +619,10 @@ public class DataDescription implements InsertStmt.DataDesc { return lineDelimiter.getSeparator(); } + public Separator getLineDelimiterObj() { + return lineDelimiter; + } + public void setLineDelimiter(Separator lineDelimiter) { this.lineDelimiter = lineDelimiter; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java index 7c5eec7729..99b23fcc61 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java @@ -155,15 +155,8 @@ public class S3TvfLoadStmt extends NativeInsertStmt { final String format = Optional.ofNullable(dataDescription.getFileFormat()).orElse(DEFAULT_FORMAT); params.put(ExternalFileTableValuedFunction.FORMAT, format); if (isCsvFormat(format)) { - final Separator separator = dataDescription.getColumnSeparatorObj(); - if (separator != null) { - try { - separator.analyze(); - } catch (AnalysisException e) { - throw new DdlException("failed to create s3 tvf ref", e); - } - params.put(ExternalFileTableValuedFunction.COLUMN_SEPARATOR, dataDescription.getColumnSeparator()); - } + parseSeparator(dataDescription.getColumnSeparatorObj(), params); + parseSeparator(dataDescription.getLineDelimiterObj(), params); } Preconditions.checkState(!brokerDesc.isMultiLoadBroker(), "do not support multi broker load currently"); @@ -181,6 +174,18 @@ public class S3TvfLoadStmt extends NativeInsertStmt { } } + private static void parseSeparator(Separator separator, Map<String, String> tvfParams) throws DdlException { + if (separator == null) { + return; + } + try { + separator.analyze(); + } catch (AnalysisException e) { + throw new DdlException(String.format("failed to parse separator:%s", separator), e); + } + tvfParams.put(ExternalFileTableValuedFunction.COLUMN_SEPARATOR, separator.getSeparator()); + } + private static boolean isCsvFormat(String format) { return Strings.isNullOrEmpty(format) || StringUtils.equalsIgnoreCase(format, FORMAT_CSV); } diff --git a/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql new file mode 100644 index 0000000000..12d9fd92fc --- /dev/null +++ b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql @@ -0,0 +1,25 @@ +CREATE TABLE csv_s3_case_line_delimiter ( + l_shipdate DATE NOT NULL, + l_orderkey bigint NOT NULL, + l_linenumber int not null, + l_partkey int NOT NULL, + l_suppkey int not null, + l_quantity decimal(15, 2) NOT NULL, + l_extendedprice decimal(15, 2) NOT NULL, + l_discount decimal(15, 2) NOT NULL, + l_tax decimal(15, 2) NOT NULL, + l_returnflag VARCHAR(1) NOT NULL, + l_linestatus VARCHAR(1) NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct VARCHAR(25) NOT NULL, + l_shipmode VARCHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`l_shipdate`, `l_orderkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "lineitem_orders" +); \ No newline at end of file diff --git a/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql new file mode 100644 index 0000000000..f3ab15d05e --- /dev/null +++ b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS csv_s3_case_line_delimiter \ No newline at end of file diff --git a/regression-test/suites/load_p2/broker_load/test_broker_load.groovy b/regression-test/suites/load_p2/broker_load/test_broker_load.groovy index d9b4cfea37..fb29639908 100644 --- a/regression-test/suites/load_p2/broker_load/test_broker_load.groovy +++ b/regression-test/suites/load_p2/broker_load/test_broker_load.groovy @@ -48,7 +48,8 @@ suite("test_broker_load_p2", "p2") { "orc_s3_case7", // table column uppercase * load column lowercase * orc file lowercase "orc_s3_case8", // table column uppercase * load column uppercase * orc file uppercase "orc_s3_case9", // table column uppercase * load column lowercase * orc file uppercase - ] + "csv_s3_case_line_delimiter" // csv format table with special line delimiter + ] def paths = ["s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", @@ -80,51 +81,53 @@ suite("test_broker_load_p2", "p2") { "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_lowercase.orc", "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc", "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc", + "s3://doris-build-1308700295/regression/line_delimiter/lineitem_0x7.csv.gz" ] def columns_list = ["""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", - """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", - """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", - """p_partkey, p_name, p_size""", - """p_partkey""", - """p_partkey""", - """p_partkey, p_size""", - """p_partkey""", - """p_partkey, p_size""", - """p_partkey, p_size""", - """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", - """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", - """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", - """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, col1""", - """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, x1""", - """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", - """col1, col2, col3, col4""", - """p_partkey, p_name, p_mfgr, x1""", - """p_partkey, p_name, p_mfgr, p_brand""", - """p_partkey, p_name, p_mfgr, p_brand""", - """p_name, p_mfgr""", - """""", - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - //TODO: comment blow 8 rows after jibing fix - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid [...] - //TODO: uncomment blow 8 rows after jibing fix - // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengin [...] - // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGIN [...] - // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengin [...] - // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGIN [...] - // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGIN [...] - // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengin [...] - // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGIN [...] - // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengin [...] - ] - def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "", "COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] - def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "", "preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", + """p_partkey, p_name, p_size""", + """p_partkey""", + """p_partkey""", + """p_partkey, p_size""", + """p_partkey""", + """p_partkey, p_size""", + """p_partkey, p_size""", + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, col1""", + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, x1""", + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", + """col1, col2, col3, col4""", + """p_partkey, p_name, p_mfgr, x1""", + """p_partkey, p_name, p_mfgr, p_brand""", + """p_partkey, p_name, p_mfgr, p_brand""", + """p_name, p_mfgr""", + """""", + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + //TODO: comment blow 8 rows after jibing fix + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """""" + //TODO: uncomment blow 8 rows after jibing fix + // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase, [...] + // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE, [...] + // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase, [...] + // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE, [...] + // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE, [...] + // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase, [...] + // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE, [...] + // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase, [...] + ] + def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "", "COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "", "preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] def set_values = ["", "", "SET(comment=p_comment, retailprice=p_retailprice, container=p_container, size=p_size, type=p_type, brand=p_brand, mfgr=p_mfgr, name=p_name, partkey=p_partkey)", @@ -155,9 +158,12 @@ suite("test_broker_load_p2", "p2") { "", "", "", + "", "" ] - def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + + def line_delimiters = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "\u0007"] def etl_info = ["unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", @@ -190,7 +196,8 @@ suite("test_broker_load_p2", "p2") { "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - ] + "\\N" + ] def task_info = ["cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", @@ -223,50 +230,59 @@ suite("test_broker_load_p2", "p2") { "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", + "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0" ] def error_msg = ["", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "[INTERNAL_ERROR]failed to find default value expr for slot: x1", - "", - "", - "[INTERNAL_ERROR]failed to find default value expr for slot: x1", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ] + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "[INTERNAL_ERROR]failed to find default value expr for slot: x1", + "", + "", + "[INTERNAL_ERROR]failed to find default value expr for slot: x1", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ] String ak = getS3AK() String sk = getS3SK() String enabled = context.config.otherConfigs.get("enableBrokerLoad") def do_load_job = {uuid, path, table, columns, column_in_path, preceding_filter, - set_value, where_expr -> - String columns_str = ("$columns" != "") ? "($columns)" : ""; - String format_str = table.startsWith("orc_s3_case") ? "ORC" : "PARQUET" + set_value, where_expr, line_delimiter -> + String columns_str = ("$columns" != "") ? "($columns)" : ""; + String format_str + if (table.startsWith("orc_s3_case")) { + format_str = "ORC" + } else if (table.startsWith("csv")) { + format_str = "CSV" + } else { + format_str = "PARQUET" + } sql """ - LOAD LABEL $uuid ( + LOAD LABEL $uuid ( DATA INFILE("$path") INTO TABLE $table FORMAT AS $format_str @@ -275,6 +291,7 @@ suite("test_broker_load_p2", "p2") { $preceding_filter $set_value $where_expr + $line_delimiter ) WITH S3 ( "AWS_ACCESS_KEY" = "$ak", @@ -300,7 +317,7 @@ suite("test_broker_load_p2", "p2") { def uuid = UUID.randomUUID().toString().replace("-", "0") uuids.add(uuid) do_load_job.call(uuid, paths[i], table, columns_list[i], column_in_paths[i], preceding_filters[i], - set_values[i], where_exprs[i]) + set_values[i], where_exprs[i], line_delimiters[i]) i++ } @@ -351,4 +368,3 @@ suite("test_broker_load_p2", "p2") { } } } - diff --git a/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy b/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy index 964e2c926a..665730d3d5 100644 --- a/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy +++ b/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy @@ -29,7 +29,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { "set7", "null_default", "filter", - // "path_column", + "path_column", "parquet_s3_case1", // col1 not in file but in table, will load default value for it. "parquet_s3_case2", // x1 not in file, not in table, will throw "col not found" error. "parquet_s3_case3", // p_comment not in table but in file, load normally. @@ -48,6 +48,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { "orc_s3_case7", // table column uppercase * load column lowercase * orc file lowercase "orc_s3_case8", // table column uppercase * load column uppercase * orc file uppercase "orc_s3_case9", // table column uppercase * load column lowercase * orc file uppercase + "csv_s3_case_line_delimiter" // csv format table with special line delimiter ] def paths = ["s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", @@ -61,7 +62,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { "s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", - // "s3://doris-build-1308700295/regression/load/data/path/*/part*", + "s3://doris-build-1308700295/regression/load/data/path/*/part*", "s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", @@ -80,6 +81,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_lowercase.orc", "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc", "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc", + "s3://doris-build-1308700295/regression/line_delimiter/lineitem_0x7.csv.gz" ] def columns_list = ["""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", @@ -93,7 +95,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { """p_partkey, p_size""", """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", - // """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", + """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, col1""", """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, x1""", """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", @@ -113,6 +115,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng [...] + """""" //TODO: uncomment blow 8 rows after jibing fix // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase, [...] // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE, [...] @@ -123,8 +126,8 @@ suite("test_tvf_based_broker_load_p2", "p2") { // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE, [...] // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase, [...] ] - def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", ""/*, "COLUMNS FROM PATH AS (city)"*/, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] - def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "", "preceding filter p_size < 10"/*, ""*/, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "", "COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "", "preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] def set_values = ["", "", "SET(comment=p_comment, retailprice=p_retailprice, container=p_container, size=p_size, type=p_type, brand=p_brand, mfgr=p_mfgr, name=p_name, partkey=p_partkey)", @@ -137,7 +140,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { "set(partsize = p_partkey + p_size)", "", "", - // "", + "", "", "", "", @@ -155,116 +158,27 @@ suite("test_tvf_based_broker_load_p2", "p2") { "", "", "", + "", "" ] - def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where p_partkey>10"/*, ""*/, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] - - def etl_info = ["unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=163706; dpp.abnorm.ALL=0; dpp.norm.ALL=36294", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "\\N", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "\\N", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=4096", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=100000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - ] - - def task_info = ["cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - ] - - def error_msg = ["", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "[INTERNAL_ERROR]failed to find default value expr for slot: x1", - "", - "", - "[INTERNAL_ERROR]failed to find default value expr for slot: x1", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ] + def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",""] + def line_delimiters = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "\u0007"] String ak = getS3AK() String sk = getS3SK() String enabled = context.config.otherConfigs.get("enableBrokerLoad") def do_load_job = { uuid, path, table, columns, column_in_path, preceding_filter, - set_value, where_expr -> + set_value, where_expr, line_delimiter -> String columns_str = ("$columns" != "") ? "($columns)" : ""; - String format_str = table.startsWith("orc_s3_case") ? "ORC" : "PARQUET" + String format_str + if (table.startsWith("orc_s3_case")) { + format_str = "ORC" + } else if (table.startsWith("csv")) { + format_str = "CSV" + } else { + format_str = "PARQUET" + } sql """ LOAD LABEL $uuid ( DATA INFILE("$path") @@ -275,6 +189,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { $preceding_filter $set_value $where_expr + $line_delimiter ) WITH S3 ( "AWS_ACCESS_KEY" = "$ak", @@ -299,7 +214,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { def uuid = UUID.randomUUID().toString().replace("-", "0") uuids.add(uuid) do_load_job.call(uuid, paths[i], table, columns_list[i], column_in_paths[i], preceding_filters[i], - set_values[i], where_exprs[i]) + set_values[i], where_exprs[i], line_delimiters[i]) i++ } @@ -325,4 +240,4 @@ suite("test_tvf_based_broker_load_p2", "p2") { sql """ set enable_unified_load=false; """ } } -} +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org