This is an automated email from the ASF dual-hosted git repository. liaoxin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6801e069751 [fix](broker-load) fix error in broker-load with read_json_by_line (#41788) 6801e069751 is described below commit 6801e0697517519ed8fc725657542ba8e141d0bb Author: Uniqueyou <134280716+wyxxx...@users.noreply.github.com> AuthorDate: Fri Oct 18 00:00:21 2024 +0800 [fix](broker-load) fix error in broker-load with read_json_by_line (#41788) --- .../org/apache/doris/load/BrokerFileGroup.java | 19 ++++++------- .../broker_load/test_s3_load_properties.out | 18 ++++++++++++ .../broker_load/test_s3_load_properties.groovy | 32 ++++++++++++++++++++++ 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/BrokerFileGroup.java b/fe/fe-core/src/main/java/org/apache/doris/load/BrokerFileGroup.java index 366983d4139..8605dc59bcc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/BrokerFileGroup.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/BrokerFileGroup.java @@ -184,6 +184,7 @@ public class BrokerFileGroup implements Writable { if (lineDelimiter == null) { lineDelimiter = "\n"; } + enclose = dataDescription.getEnclose(); escape = dataDescription.getEscape(); @@ -226,16 +227,14 @@ public class BrokerFileGroup implements Writable { srcTableId = srcTable.getId(); isLoadFromTable = true; } - if (fileFormat != null && fileFormat.equalsIgnoreCase("json")) { - stripOuterArray = dataDescription.isStripOuterArray(); - jsonPaths = dataDescription.getJsonPaths(); - jsonRoot = dataDescription.getJsonRoot(); - fuzzyParse = dataDescription.isFuzzyParse(); - // ATTN: for broker load, we only support reading json format data line by line, - // so if this is set to false, it must be stream load. - readJsonByLine = dataDescription.isReadJsonByLine(); - numAsString = dataDescription.isNumAsString(); - } + stripOuterArray = dataDescription.isStripOuterArray(); + jsonPaths = dataDescription.getJsonPaths(); + jsonRoot = dataDescription.getJsonRoot(); + fuzzyParse = dataDescription.isFuzzyParse(); + // ATTN: for broker load, we only support reading json format data line by line, + // so if this is set to false, it must be stream load. + readJsonByLine = dataDescription.isReadJsonByLine(); + numAsString = dataDescription.isNumAsString(); trimDoubleQuotes = dataDescription.getTrimDoubleQuotes(); skipLines = dataDescription.getSkipLines(); } diff --git a/regression-test/data/load_p2/broker_load/test_s3_load_properties.out b/regression-test/data/load_p2/broker_load/test_s3_load_properties.out index 022c08cf8f3..8608a3107fe 100644 --- a/regression-test/data/load_p2/broker_load/test_s3_load_properties.out +++ b/regression-test/data/load_p2/broker_load/test_s3_load_properties.out @@ -380,6 +380,24 @@ -- !select -- 280 +-- !select -- +48 + +-- !select -- +48 + +-- !select -- +47 + +-- !select -- +47 + +-- !select -- +48 + +-- !select -- +216 + -- !select -- 18 diff --git a/regression-test/suites/load_p2/broker_load/test_s3_load_properties.groovy b/regression-test/suites/load_p2/broker_load/test_s3_load_properties.groovy index fbb0df3b002..59ad8e67dff 100644 --- a/regression-test/suites/load_p2/broker_load/test_s3_load_properties.groovy +++ b/regression-test/suites/load_p2/broker_load/test_s3_load_properties.groovy @@ -418,6 +418,38 @@ suite("test_s3_load_properties", "p2") { "", "", "", "", "")) } + for (String table : basicTables) { + attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.json", + "${table}", "", "", "", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18)", + "", "", "", "", "PROPERTIES(\"strip_outer_array\" = \"true\", \"fuzzy_parse\" = \"true\")")) + } + + attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.json", + "agg_tbl_basic", "", "", "", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18)", + "", "", "SET (k19=to_bitmap(k04),k20=HLL_HASH(k04),k21=TO_QUANTILE_STATE(k04,1.0),kd19=to_bitmap(k05),kd20=HLL_HASH(k05),kd21=TO_QUANTILE_STATE(k05,1.0))", "", "PROPERTIES(\"strip_outer_array\" = \"true\", \"fuzzy_parse\" = \"true\")")) + + for (String table : arrayTables) { + attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_array_data.json", + "${table}", "", "", "", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17)", + "", "", "", "", "PROPERTIES(\"strip_outer_array\" = \"true\", \"fuzzy_parse\" = \"true\")")) + } + + for (String table : basicTables) { + attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data_by_line.json", + "${table}", "", "", "", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18)", + "", "", "", "", "PROPERTIES(\"read_json_by_line\" = \"true\")")) + } + + attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data_by_line.json", + "agg_tbl_basic", "", "", "", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18)", + "", "", "SET (k19=to_bitmap(k04),k20=HLL_HASH(k04),k21=TO_QUANTILE_STATE(k04,1.0),kd19=to_bitmap(k05),kd20=HLL_HASH(k05),kd21=TO_QUANTILE_STATE(k05,1.0))", "", "PROPERTIES(\"read_json_by_line\" = \"true\")")) + + for (String table : arrayTables) { + attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_array_data_by_line.json", + "${table}", "", "", "", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17)", + "", "", "", "", "PROPERTIES(\"read_json_by_line\" = \"true\")")) + } + attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.parq", "agg_tbl_basic", "", "", "FORMAT AS \"PARQUET\"", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18)", "", "", "SET (k19=to_bitmap(k04),k20=HLL_HASH(k04),k21=TO_QUANTILE_STATE(k04,1.0),kd19=to_bitmap(k05),kd20=HLL_HASH(k05),kd21=TO_QUANTILE_STATE(k05,1.0))", "", "")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org