This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 44c4a45f72 [fix](array-type) fix the wrong data when use stream load 
to import '\N' (#12102)
44c4a45f72 is described below

commit 44c4a45f72f0be84ce49e9b92954daeabb65dc6c
Author: carlvinhust2012 <huchengha...@126.com>
AuthorDate: Mon Aug 29 09:53:37 2022 +0800

    [fix](array-type) fix the wrong data when use stream load to import '\N' 
(#12102)
    
    Co-authored-by: hucheng01 <huchen...@baidu.com>
---
 be/src/exprs/cast_functions.cpp                    |  3 +
 be/src/vec/functions/function_cast.h               |  5 ++
 .../data/load_p0/broker_load/simple_array.data     |  3 +
 .../data/load_p0/broker_load/test_array_load.out   |  6 ++
 .../load_p0/broker_load/test_array_load.groovy     | 69 ++++++++++++++++++++--
 5 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/be/src/exprs/cast_functions.cpp b/be/src/exprs/cast_functions.cpp
index 9885d9941e..d2d887ac61 100644
--- a/be/src/exprs/cast_functions.cpp
+++ b/be/src/exprs/cast_functions.cpp
@@ -810,6 +810,9 @@ DateTimeV2Val 
CastFunctions::cast_to_datetimev2_val(FunctionContext* ctx, const
 }
 
 CollectionVal CastFunctions::cast_to_array_val(FunctionContext* context, const 
StringVal& val) {
+    if (val.is_null) {
+        return CollectionVal::null();
+    }
     CollectionVal array_val;
     Status status = ArrayParser::parse(array_val, context, val);
     return status.ok() ? array_val : CollectionVal::null();
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index 11d8d7ed44..0ed17714b4 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -325,6 +325,11 @@ struct ConvertImplGenericFromString {
 
             for (size_t i = 0; i < size; ++i) {
                 const auto& val = col_from_string->get_data_at(i);
+                // Note: here we should handle the null element
+                if (val.size == 0) {
+                    col_to->insert_default();
+                    continue;
+                }
                 ReadBuffer read_buffer((char*)(val.data), val.size);
                 RETURN_IF_ERROR(data_type_to->from_string(read_buffer, 
col_to));
             }
diff --git a/regression-test/data/load_p0/broker_load/simple_array.data 
b/regression-test/data/load_p0/broker_load/simple_array.data
new file mode 100644
index 0000000000..7501722c69
--- /dev/null
+++ b/regression-test/data/load_p0/broker_load/simple_array.data
@@ -0,0 +1,3 @@
+1/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/["1991-01-01"]/["1991-01-01
 00:00:00"]/[0.33,0.67]/[3.1415926,0.878787878]/[1,1.2,1.3]
+2/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/\N/\N/\N/\N/[1,\N,1.3]
+3/\N/\N/\N/\N/\N/\N/\N/\N/\N/\N
\ No newline at end of file
diff --git a/regression-test/data/load_p0/broker_load/test_array_load.out 
b/regression-test/data/load_p0/broker_load/test_array_load.out
index da94539584..0dfbd74d4f 100644
--- a/regression-test/data/load_p0/broker_load/test_array_load.out
+++ b/regression-test/data/load_p0/broker_load/test_array_load.out
@@ -31,6 +31,12 @@
 5      [NULL, NULL]    [32767, 32768, NULL]    [65534, NULL, 65536]    ['a', 
'b', 'c', 'd', 'e']       ['hello', 'world']      [1991-01-01]    [1991-01-01 
00:00:00]   [0.33, 0.67]    [3.1415926, 0.878787878]        [1, 1.2, 1.3]
 100    [1, 2, 3]       [32767, 32768, 32769]   [65534, 65535, 65536]   ['a', 
'b', 'c'] ['hello', 'world']      [2022-07-13]    [2022-07-13 12:30:00]   
[0.33, 0.67]    [3.1415926, 0.878787878]        [4, 5.5, 6.67]
 
+-- !select --
+1      [1, 2, 3, 4, 5] [32767, 32768, 32769]   [65534, 65535, 65536]   ['a', 
'b', 'c', 'd', 'e']       ['hello', 'world']      [1991-01-01]    [1991-01-01 
00:00:00]   [0.33, 0.67]    [3.1415926, 0.878787878]        [1, 1.2, 1.3]
+2      [1, 2, 3, 4, 5] [32767, 32768, 32769]   [65534, 65535, 65536]   ['a', 
'b', 'c', 'd', 'e']       ['hello', 'world']      \N      \N      \N      \N    
  [1, NULL, 1.3]
+3      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N
+100    [1, 2, 3]       [32767, 32768, 32769]   [65534, 65535, 65536]   ['a', 
'b', 'c'] ['hello', 'world']      [2022-07-13]    [2022-07-13 12:30:00]   
[0.33, 0.67]    [3.1415926, 0.878787878]        [4, 5.5, 6.67]
+
 -- !select --
 1      [1, 2, 3, 4, 5] [32767, 32768, 32769]   [65534, 65535, 65536]   ['a', 
'b', 'c', 'd', 'e']       ['hello', 'world']      [1991-01-01]    [1991-01-01 
00:00:00]   [0.33, 0.67]    [3.1415926, 0.878787878]        [1, 1.2, 1.3]
 2      [6, 7, 8, 9, 10]        [32767, 32768, 32769]   [65534, 65535, 65536]   
['a', 'b', 'c', 'd', 'e']       ['hello', 'world']      [1991-01-01]    
[1991-01-01 00:00:00]   [0.33, 0.67]    [3.1415926, 0.878787878]        [1, 
1.2, 1.3]
diff --git a/regression-test/suites/load_p0/broker_load/test_array_load.groovy 
b/regression-test/suites/load_p0/broker_load/test_array_load.groovy
index d585515c98..048e133522 100644
--- a/regression-test/suites/load_p0/broker_load/test_array_load.groovy
+++ b/regression-test/suites/load_p0/broker_load/test_array_load.groovy
@@ -18,6 +18,7 @@
 suite("test_array_load", "p0") {
     // define a sql table
     def testTable = "tbl_test_array_load"
+    def testTable01 = "tbl_test_array_load01"
     
     def create_test_table = {testTablex, enable_vectorized_flag ->
         // multi-line sql
@@ -64,11 +65,52 @@ suite("test_array_load", "p0") {
         assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
     }
 
-    def load_array_data = {strip_flag, read_flag, format_flag, exprs, 
json_paths, 
+    def create_test_table01 = {testTablex ->
+        // multi-line sql
+        sql "ADMIN SET FRONTEND CONFIG ('enable_array_type' = 'true')"
+
+        def result1 = sql """
+            CREATE TABLE IF NOT EXISTS ${testTable01} (
+              `k1` INT(11) NULL COMMENT "",
+              `k2` ARRAY<SMALLINT> NULL COMMENT "",
+              `k3` ARRAY<INT(11)> NULL COMMENT "",
+              `k4` ARRAY<BIGINT> NULL COMMENT "",
+              `k5` ARRAY<CHAR> NULL COMMENT "",
+              `k6` ARRAY<VARCHAR(20)> NULL COMMENT "",
+              `k7` ARRAY<DATE> NULL COMMENT "", 
+              `k8` ARRAY<DATETIME> NULL COMMENT "",
+              `k9` ARRAY<FLOAT> NULL COMMENT "",
+              `k10` ARRAY<DOUBLE> NULL COMMENT "",
+              `k11` ARRAY<DECIMAL(20, 6)> NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+            )
+            """
+        
+        // DDL/DML return 1 row and 3 column, the only value is update row 
count
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+        
+        // insert 1 row to check whether the table is ok
+        def result2 = sql """ INSERT INTO ${testTable01} VALUES
+                        (100, [1, 2, 3], [32767, 32768, 32769], [65534, 65535, 
65536], ['a', 'b', 'c'], ["hello", "world"], 
+                        ['2022-07-13'], ['2022-07-13 12:30:00'], [0.33, 0.67], 
[3.1415926, 0.878787878], [4, 5.5, 6.67])
+                        """
+        assertTrue(result2.size() == 1)
+        assertTrue(result2[0].size() == 1)
+        assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+    }
+
+    def load_array_data = {table_name, strip_flag, read_flag, format_flag, 
exprs, json_paths, 
                             json_root, where_expr, fuzzy_flag, column_sep, 
file_name ->
         // load the json data
         streamLoad {
-            table "tbl_test_array_load"
+            table table_name
             
             // set http request header params
             set 'strip_outer_array', strip_flag
@@ -160,7 +202,7 @@ suite("test_array_load", "p0") {
         
         create_test_table.call(testTable, true)
 
-        load_array_data.call('true', '', 'json', '', '', '', '', '', '', 
'simple_array.json')
+        load_array_data.call(testTable, 'true', '', 'json', '', '', '', '', 
'', '', 'simple_array.json')
         
         // select the table and check whether the data is correct
         qt_select "select * from ${testTable} order by k1"
@@ -175,7 +217,7 @@ suite("test_array_load", "p0") {
         
         create_test_table.call(testTable, false)
 
-        load_array_data.call('true', '', 'json', '', '', '', '', '', '', 
'simple_array.json')
+        load_array_data.call(testTable, 'true', '', 'json', '', '', '', '', 
'', '', 'simple_array.json')
         
         // select the table and check whether the data is correct
         qt_select "select * from ${testTable} order by k1"
@@ -190,7 +232,7 @@ suite("test_array_load", "p0") {
         
         create_test_table.call(testTable, true)
 
-        load_array_data.call('true', '', 'csv', '', '', '', '', '', '/', 
'simple_array.csv')
+        load_array_data.call(testTable, 'true', '', 'csv', '', '', '', '', '', 
'/', 'simple_array.csv')
         
         // select the table and check whether the data is correct
         qt_select "select * from ${testTable} order by k1"
@@ -205,7 +247,7 @@ suite("test_array_load", "p0") {
         
         create_test_table.call(testTable, false)
 
-        load_array_data.call('true', '', 'csv', '', '', '', '', '', '/', 
'simple_array.csv')
+        load_array_data.call(testTable, 'true', '', 'csv', '', '', '', '', '', 
'/', 'simple_array.csv')
         
         // select the table and check whether the data is correct
         qt_select "select * from ${testTable} order by k1"
@@ -214,6 +256,21 @@ suite("test_array_load", "p0") {
         try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
 
+    // case5: import array data not specify the format
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable01}"
+        
+        create_test_table01.call(testTable01)
+
+        load_array_data.call(testTable01, '', '', '', '', '', '', '', '', '/', 
'simple_array.data')
+        
+        // select the table and check whether the data is correct
+        qt_select "select * from ${testTable01} order by k1"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable01}")
+    }
+
     // if 'enableHdfs' in regression-conf.groovy has been set to true,
     // the test will run these case as below.
     if (enableHdfs()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to