This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 44c4a45f72 [fix](array-type) fix the wrong data when use stream load to import '\N' (#12102) 44c4a45f72 is described below commit 44c4a45f72f0be84ce49e9b92954daeabb65dc6c Author: carlvinhust2012 <huchengha...@126.com> AuthorDate: Mon Aug 29 09:53:37 2022 +0800 [fix](array-type) fix the wrong data when use stream load to import '\N' (#12102) Co-authored-by: hucheng01 <huchen...@baidu.com> --- be/src/exprs/cast_functions.cpp | 3 + be/src/vec/functions/function_cast.h | 5 ++ .../data/load_p0/broker_load/simple_array.data | 3 + .../data/load_p0/broker_load/test_array_load.out | 6 ++ .../load_p0/broker_load/test_array_load.groovy | 69 ++++++++++++++++++++-- 5 files changed, 80 insertions(+), 6 deletions(-) diff --git a/be/src/exprs/cast_functions.cpp b/be/src/exprs/cast_functions.cpp index 9885d9941e..d2d887ac61 100644 --- a/be/src/exprs/cast_functions.cpp +++ b/be/src/exprs/cast_functions.cpp @@ -810,6 +810,9 @@ DateTimeV2Val CastFunctions::cast_to_datetimev2_val(FunctionContext* ctx, const } CollectionVal CastFunctions::cast_to_array_val(FunctionContext* context, const StringVal& val) { + if (val.is_null) { + return CollectionVal::null(); + } CollectionVal array_val; Status status = ArrayParser::parse(array_val, context, val); return status.ok() ? array_val : CollectionVal::null(); diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 11d8d7ed44..0ed17714b4 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -325,6 +325,11 @@ struct ConvertImplGenericFromString { for (size_t i = 0; i < size; ++i) { const auto& val = col_from_string->get_data_at(i); + // Note: here we should handle the null element + if (val.size == 0) { + col_to->insert_default(); + continue; + } ReadBuffer read_buffer((char*)(val.data), val.size); RETURN_IF_ERROR(data_type_to->from_string(read_buffer, col_to)); } diff --git a/regression-test/data/load_p0/broker_load/simple_array.data b/regression-test/data/load_p0/broker_load/simple_array.data new file mode 100644 index 0000000000..7501722c69 --- /dev/null +++ b/regression-test/data/load_p0/broker_load/simple_array.data @@ -0,0 +1,3 @@ +1/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/["1991-01-01"]/["1991-01-01 00:00:00"]/[0.33,0.67]/[3.1415926,0.878787878]/[1,1.2,1.3] +2/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/\N/\N/\N/\N/[1,\N,1.3] +3/\N/\N/\N/\N/\N/\N/\N/\N/\N/\N \ No newline at end of file diff --git a/regression-test/data/load_p0/broker_load/test_array_load.out b/regression-test/data/load_p0/broker_load/test_array_load.out index da94539584..0dfbd74d4f 100644 --- a/regression-test/data/load_p0/broker_load/test_array_load.out +++ b/regression-test/data/load_p0/broker_load/test_array_load.out @@ -31,6 +31,12 @@ 5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] 100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] \N \N \N \N [1, NULL, 1.3] +3 \N \N \N \N \N \N \N \N \N \N +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + -- !select -- 1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] 2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] diff --git a/regression-test/suites/load_p0/broker_load/test_array_load.groovy b/regression-test/suites/load_p0/broker_load/test_array_load.groovy index d585515c98..048e133522 100644 --- a/regression-test/suites/load_p0/broker_load/test_array_load.groovy +++ b/regression-test/suites/load_p0/broker_load/test_array_load.groovy @@ -18,6 +18,7 @@ suite("test_array_load", "p0") { // define a sql table def testTable = "tbl_test_array_load" + def testTable01 = "tbl_test_array_load01" def create_test_table = {testTablex, enable_vectorized_flag -> // multi-line sql @@ -64,11 +65,52 @@ suite("test_array_load", "p0") { assertTrue(result2[0][0] == 1, "Insert should update 1 rows") } - def load_array_data = {strip_flag, read_flag, format_flag, exprs, json_paths, + def create_test_table01 = {testTablex -> + // multi-line sql + sql "ADMIN SET FRONTEND CONFIG ('enable_array_type' = 'true')" + + def result1 = sql """ + CREATE TABLE IF NOT EXISTS ${testTable01} ( + `k1` INT(11) NULL COMMENT "", + `k2` ARRAY<SMALLINT> NULL COMMENT "", + `k3` ARRAY<INT(11)> NULL COMMENT "", + `k4` ARRAY<BIGINT> NULL COMMENT "", + `k5` ARRAY<CHAR> NULL COMMENT "", + `k6` ARRAY<VARCHAR(20)> NULL COMMENT "", + `k7` ARRAY<DATE> NULL COMMENT "", + `k8` ARRAY<DATETIME> NULL COMMENT "", + `k9` ARRAY<FLOAT> NULL COMMENT "", + `k10` ARRAY<DOUBLE> NULL COMMENT "", + `k11` ARRAY<DECIMAL(20, 6)> NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + + // DDL/DML return 1 row and 3 column, the only value is update row count + assertTrue(result1.size() == 1) + assertTrue(result1[0].size() == 1) + assertTrue(result1[0][0] == 0, "Create table should update 0 rows") + + // insert 1 row to check whether the table is ok + def result2 = sql """ INSERT INTO ${testTable01} VALUES + (100, [1, 2, 3], [32767, 32768, 32769], [65534, 65535, 65536], ['a', 'b', 'c'], ["hello", "world"], + ['2022-07-13'], ['2022-07-13 12:30:00'], [0.33, 0.67], [3.1415926, 0.878787878], [4, 5.5, 6.67]) + """ + assertTrue(result2.size() == 1) + assertTrue(result2[0].size() == 1) + assertTrue(result2[0][0] == 1, "Insert should update 1 rows") + } + + def load_array_data = {table_name, strip_flag, read_flag, format_flag, exprs, json_paths, json_root, where_expr, fuzzy_flag, column_sep, file_name -> // load the json data streamLoad { - table "tbl_test_array_load" + table table_name // set http request header params set 'strip_outer_array', strip_flag @@ -160,7 +202,7 @@ suite("test_array_load", "p0") { create_test_table.call(testTable, true) - load_array_data.call('true', '', 'json', '', '', '', '', '', '', 'simple_array.json') + load_array_data.call(testTable, 'true', '', 'json', '', '', '', '', '', '', 'simple_array.json') // select the table and check whether the data is correct qt_select "select * from ${testTable} order by k1" @@ -175,7 +217,7 @@ suite("test_array_load", "p0") { create_test_table.call(testTable, false) - load_array_data.call('true', '', 'json', '', '', '', '', '', '', 'simple_array.json') + load_array_data.call(testTable, 'true', '', 'json', '', '', '', '', '', '', 'simple_array.json') // select the table and check whether the data is correct qt_select "select * from ${testTable} order by k1" @@ -190,7 +232,7 @@ suite("test_array_load", "p0") { create_test_table.call(testTable, true) - load_array_data.call('true', '', 'csv', '', '', '', '', '', '/', 'simple_array.csv') + load_array_data.call(testTable, 'true', '', 'csv', '', '', '', '', '', '/', 'simple_array.csv') // select the table and check whether the data is correct qt_select "select * from ${testTable} order by k1" @@ -205,7 +247,7 @@ suite("test_array_load", "p0") { create_test_table.call(testTable, false) - load_array_data.call('true', '', 'csv', '', '', '', '', '', '/', 'simple_array.csv') + load_array_data.call(testTable, 'true', '', 'csv', '', '', '', '', '', '/', 'simple_array.csv') // select the table and check whether the data is correct qt_select "select * from ${testTable} order by k1" @@ -214,6 +256,21 @@ suite("test_array_load", "p0") { try_sql("DROP TABLE IF EXISTS ${testTable}") } + // case5: import array data not specify the format + try { + sql "DROP TABLE IF EXISTS ${testTable01}" + + create_test_table01.call(testTable01) + + load_array_data.call(testTable01, '', '', '', '', '', '', '', '', '/', 'simple_array.data') + + // select the table and check whether the data is correct + qt_select "select * from ${testTable01} order by k1" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable01}") + } + // if 'enableHdfs' in regression-conf.groovy has been set to true, // the test will run these case as below. if (enableHdfs()) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org