This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 15f162f69054ff62464f3cf1710ea6114fdbf438
Author: lihangyu <15605149...@163.com>
AuthorDate: Fri Jun 28 18:23:35 2024 +0800

    [Fix](variant) ignore serialization of nothing type (#36997)
    
    1. fix variant should not serialize nothing type
    2. fix unstable cases
---
 be/src/vec/data_types/data_type_object.cpp           | 14 +++++++++++---
 regression-test/data/variant_p0/load.out             | 12 ++++++------
 .../data/variant_p0/test_sub_path_pruning.out        | 10 +---------
 regression-test/suites/variant_p0/load.groovy        |  8 ++++----
 .../suites/variant_p0/test_sub_path_pruning.groovy   | 20 +++++++++++---------
 5 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/be/src/vec/data_types/data_type_object.cpp 
b/be/src/vec/data_types/data_type_object.cpp
index 7adc4c17f56..7a75583cd7b 100644
--- a/be/src/vec/data_types/data_type_object.cpp
+++ b/be/src/vec/data_types/data_type_object.cpp
@@ -63,6 +63,9 @@ int64_t 
DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
     size += sizeof(uint32_t);
     for (const auto& entry : subcolumns) {
         auto type = entry->data.get_least_common_type();
+        if (is_nothing(type)) {
+            continue;
+        }
 
         PColumnMeta column_meta_pb;
         column_meta_pb.set_name(entry->path.get_path());
@@ -91,15 +94,18 @@ char* DataTypeObject::serialize(const IColumn& column, 
char* buf, int be_exec_ve
 
     const auto& subcolumns = column_object.get_subcolumns();
 
-    // 1. serialize num of subcolumns
-    *reinterpret_cast<uint32_t*>(buf) = subcolumns.size();
+    char* size_pos = buf;
     buf += sizeof(uint32_t);
 
+    size_t num_of_columns = 0;
     // 2. serialize each subcolumn in a loop
     for (const auto& entry : subcolumns) {
         // 2.1 serialize subcolumn column meta pb (path and type)
         auto type = entry->data.get_least_common_type();
-
+        if (is_nothing(type)) {
+            continue;
+        }
+        ++num_of_columns;
         PColumnMeta column_meta_pb;
         column_meta_pb.set_name(entry->path.get_path());
         type->to_pb_column_meta(&column_meta_pb);
@@ -113,6 +119,8 @@ char* DataTypeObject::serialize(const IColumn& column, 
char* buf, int be_exec_ve
         // 2.2 serialize subcolumn
         buf = type->serialize(entry->data.get_finalized_column(), buf, 
be_exec_version);
     }
+    // serialize num of subcolumns
+    *reinterpret_cast<uint32_t*>(size_pos) = num_of_columns;
 
     return buf;
 }
diff --git a/regression-test/data/variant_p0/load.out 
b/regression-test/data/variant_p0/load.out
index ab83a86cfb3..954faffccb0 100644
--- a/regression-test/data/variant_p0/load.out
+++ b/regression-test/data/variant_p0/load.out
@@ -79,11 +79,11 @@
 -- !sql --
 {"c":"123"}
 {"c":123}
-{"cc":[123.0]}
+{"cc":[123.2]}
 {"cc":[123.1]}
 {"ccc":123}
 {"ccc":123321}
-{"cccc":123.0}
+{"cccc":123.22}
 {"cccc":123.11}
 {"ccccc":[123]}
 {"ccccc":[123456789]}
@@ -123,7 +123,7 @@
 1.10111        1800
 1.1111 17211
 \N     123456
-123    191191
+123.22 191191
 \N     123456789101112
 
 -- !sql_7 --
@@ -155,7 +155,7 @@
 123    {"A":123}
 123456 {"A":123456}
 123456789101112        {"A":123456789101112}
-191191 {"A":191191,"a":123.0,"c":123}
+191191 {"A":191191,"a":123.22,"c":123}
 1800   {"A":1800,"a":1.10111,"c":[12345]}
 17211  {"A":17211,"a":1.1111,"c":111111}
 
@@ -175,7 +175,7 @@
 \N     123456789101112 {"A":123456789101112}   \N
 \N     \N      {"AA":[123456]} \N
 \N     \N      {"AA":[123456789101112]}        \N
-123    191191  {"A":191191,"a":123.0,"c":123}  \N
+123.22 191191  {"A":191191,"a":123.22,"c":123} \N
 123    \N      {"a":"123","c":123456}  \N
 1.10111        1800    {"A":1800,"a":1.10111,"c":[12345]}      \N
 1.1111 17211   {"A":17211,"a":1.1111,"c":111111}       \N
@@ -212,7 +212,7 @@
 [123]
 
 -- !sql_25 --
-50000  54999.99999999862       6150000
+50000  54999.999999999935      6150000
 
 -- !sql_26 --
 5000
diff --git a/regression-test/data/variant_p0/test_sub_path_pruning.out 
b/regression-test/data/variant_p0/test_sub_path_pruning.out
index a48bc550d00..16328739167 100644
--- a/regression-test/data/variant_p0/test_sub_path_pruning.out
+++ b/regression-test/data/variant_p0/test_sub_path_pruning.out
@@ -233,11 +233,7 @@
 {"b":{"c":{"d":{"e":11}}},"c":{"d":{"e":12}},"d":{"e":13},"e":14}
 
 -- !sql --
-
-{"c":{"d":{"e":11}}}
-
--- !sql --
-""
+1
 {"c":{"d":{"e":11}}}
 
 -- !sql --
@@ -252,10 +248,6 @@
 \N
 
 
--- !sql --
-""
-{"e":11}
-
 -- !sql --
 1      1
 2      1
diff --git a/regression-test/suites/variant_p0/load.groovy 
b/regression-test/suites/variant_p0/load.groovy
index 4103a2b34fa..cbd6bc1178c 100644
--- a/regression-test/suites/variant_p0/load.groovy
+++ b/regression-test/suites/variant_p0/load.groovy
@@ -93,7 +93,7 @@ suite("regression_test_variant", "nonConcurrent"){
             sql """insert into ${table_name} values (8,  '8.11111'),(1,  '{"a" 
: 1, "b" : {"c" : [{"a" : 1}]}}');"""
             sql """insert into ${table_name} values (9,  '"9999"'),(1,  '{"a" 
: 1, "b" : {"c" : [{"a" : 1}]}}');"""
             sql """insert into ${table_name} values (10,  '1000000'),(1,  
'{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');"""
-            sql """insert into ${table_name} values (11,  '[123.0]'),(1999,  
'{"a" : 1, "b" : {"c" : 1}}'),(19921,  '{"a" : 1, "b" : 10}');"""
+            sql """insert into ${table_name} values (11,  '[123.1]'),(1999,  
'{"a" : 1, "b" : {"c" : 1}}'),(19921,  '{"a" : 1, "b" : 10}');"""
             sql """insert into ${table_name} values (12,  '[123.2]'),(1022,  
'{"a" : 1, "b" : 10}'),(1029,  '{"a" : 1, "b" : {"c" : 1}}');"""
             qt_sql1 "select k, cast(v['a'] as array<int>) from  ${table_name} 
where  size(cast(v['a'] as array<int>)) > 0 order by k, cast(v['a'] as string) 
asc"
             qt_sql2 "select k, cast(v as int), cast(v['b'] as string) from  
${table_name} where  length(cast(v['b'] as string)) > 4 order  by k, cast(v as 
string), cast(v['b'] as string) "
@@ -110,11 +110,11 @@ suite("regression_test_variant", "nonConcurrent"){
         create_table table_name
         sql """insert into ${table_name} values (1, '{"c" : "123"}');"""
         sql """insert into ${table_name} values (2, '{"c" : 123}');"""
-        sql """insert into ${table_name} values (3, '{"cc" : [123.0]}');"""
+        sql """insert into ${table_name} values (3, '{"cc" : [123.2]}');"""
         sql """insert into ${table_name} values (4, '{"cc" : [123.1]}');"""
         sql """insert into ${table_name} values (5, '{"ccc" : 123}');"""
         sql """insert into ${table_name} values (6, '{"ccc" : 123321}');"""
-        sql """insert into ${table_name} values (7, '{"cccc" : 123.0}');"""
+        sql """insert into ${table_name} values (7, '{"cccc" : 123.22}');"""
         sql """insert into ${table_name} values (8, '{"cccc" : 123.11}');"""
         sql """insert into ${table_name} values (9, '{"ccccc" : [123]}');"""
         sql """insert into ${table_name} values (10, '{"ccccc" : 
[123456789]}');"""
@@ -139,7 +139,7 @@ suite("regression_test_variant", "nonConcurrent"){
         qt_sql_4 "select cast(v['A'] as string), v['AA'], v from ${table_name} 
order by k"
         qt_sql_5 "select v['A'], v['AA'], v, v from ${table_name} where 
cast(v['A'] as bigint) > 123 order by k"
 
-        sql """insert into ${table_name} values (16,  '{"a" : 123.0, "A" : 
191191, "c": 123}');"""
+        sql """insert into ${table_name} values (16,  '{"a" : 123.22, "A" : 
191191, "c": 123}');"""
         sql """insert into ${table_name} values (18,  '{"a" : "123", "c" : 
123456}');"""
         sql """insert into ${table_name} values (20,  '{"a" : 1.10111, "A" : 
1800, "c" : [12345]}');"""
         // sql """insert into ${table_name} values (12,  '{"a" : [123]}, "c": 
"123456"');"""
diff --git a/regression-test/suites/variant_p0/test_sub_path_pruning.groovy 
b/regression-test/suites/variant_p0/test_sub_path_pruning.groovy
index a6bce99d5e4..bd819934910 100644
--- a/regression-test/suites/variant_p0/test_sub_path_pruning.groovy
+++ b/regression-test/suites/variant_p0/test_sub_path_pruning.groovy
@@ -138,7 +138,9 @@ suite("variant_sub_path_pruning", "variant_type"){
     // distinct could not push down, only push down union all
 
     // two children
-    order_qt_sql """select dt['a'] as c1 from pruning_test union all select 
dt['a'] as c1 from pruning_test;"""
+    order_qt_sql """
+        select  
/*+SET_VAR(batch_size=50,disable_streaming_preaggregations=false,enable_distinct_streaming_aggregation=true,parallel_fragment_exec_instance_num=6,parallel_pipeline_task_num=2,profile_level=1,enable_pipeline_engine=true,enable_parallel_scan=false,parallel_scan_max_scanners_count=16,parallel_scan_min_rows_per_scanner=128,enable_fold_constant_by_be=false,enable_rewrite_element_at_to_slot=true,runtime_filter_type=2,enable_nereids_planner=true,rewrite_or_to_in_predicate_thresh
 [...]
+        """
     order_qt_sql """select c1['a'] from (select dt as c1 from pruning_test 
union all select dt as c1 from pruning_test) v1;"""
     order_qt_sql """select c1['b'] from (select dt['a'] as c1 from 
pruning_test union all select dt['a'] as c1 from pruning_test) v1;"""
     order_qt_sql """select c1['b'] from (select dt['a'] as c1 from 
pruning_test union all select dt as c1 from pruning_test) v1;"""
@@ -159,19 +161,19 @@ suite("variant_sub_path_pruning", "variant_type"){
     order_qt_sql """select c1['c']['d'] from (select dt['a']['b'] as c1 from 
pruning_test union all select dt['a'] as c1 from pruning_test union all select 
dt as c1 from pruning_test) v1;"""
 
     // one table + one const list
-    order_qt_sql """select id, c1['a'] from (select cast('{"a":1}' as variant) 
as c1, 1 as id union all select dt as c1, id from pruning_test) tmp order by id 
limit 100;"""
+    order_qt_sql """select id, cast(c1['a'] as text) from (select 
cast('{"a":1}' as variant) as c1, 1 as id union all select dt as c1, id from 
pruning_test) tmp order by id limit 100;"""
     order_qt_sql """select c1['a'] from (select id, c1 from (select 
cast('{"a":1}' as variant) as c1, 1 as id union all select dt as c1, id from 
pruning_test) tmp order by id limit 100) tmp;"""
-    order_qt_sql """select c2['b'] from (select id, c1['a'] as c2 from (select 
cast('{"a":{"b":1}}' as variant) as c1, 1 as id union all select dt as c1, id 
from pruning_test) tmp order by id limit 100) tmp;"""
-    order_qt_sql """select c2['a']['b'] from (select id, c1 as c2 from (select 
cast('{"a":{"b":1}}' as variant) as c1, 1 as id union all select dt as c1, id 
from pruning_test) tmp order by id limit 100) tmp;"""
-    order_qt_sql """select id, c1['c'] from (select cast('{"c":1}' as variant) 
as c1, 1 as id union all select dt['a']['b'] as c1, id from pruning_test) tmp 
order by id limit 100;"""
+    order_qt_sql """select c2['b'] from (select id, cast(c1['a'] as text) as 
c2 from (select cast('{"a":{"b":1}}' as variant) as c1, 1 as id union all 
select dt as c1, id from pruning_test) tmp order by id limit 100) tmp;"""
+    // order_qt_sql """select c2['a']['b'] from (select id, c1 as c2 from 
(select cast('1' as variant) as c1, 1 as id union all select dt as c1, id from 
pruning_test) tmp order by id limit 100) tmp;"""
+    order_qt_sql """select id, cast(c1['c'] as text) from (select 
cast('{"c":1}' as variant) as c1, 1 as id union all select dt['a']['b'] as c1, 
id from pruning_test) tmp order by 1, 2 limit 100;"""
     order_qt_sql """select c1['c'] from (select id, c1 from (select 
cast('{"c":1}' as variant) as c1, 1 as id union all select dt['a']['b'] as c1, 
id from pruning_test) tmp order by id limit 100) tmp;"""
-    order_qt_sql """select c2['d'] from (select id, c1['a'] as c2 from (select 
cast('{"c":{"d":1}}' as variant) as c1, 1 as id union all select dt['a']['b'] 
as c1, id from pruning_test) tmp order by id limit 100) tmp;"""
-    order_qt_sql """select c2['c']['d'] from (select id, c1 as c2 from (select 
cast('{"c":{"d":1}}' as variant) as c1, 1 as id union all select dt['a']['b'] 
as c1, id from pruning_test) tmp order by id limit 100) tmp;"""
+    order_qt_sql """select  cast(c2['d'] as text)  from (select id, c1['a'] as 
c2 from (select cast('{"c":{"d":1}}' as variant) as c1, 1 as id union all 
select dt['a']['b'] as c1, id from pruning_test) tmp order by id limit 100) 
tmp;"""
+    // order_qt_sql """select c2['c']['d'] from (select id, c1 as c2 from 
(select cast('{"c":{"d":1}}' as variant) as c1, 1 as id union all select 
dt['a']['b'] as c1, id from pruning_test) tmp order by id limit 100) tmp;"""
 
     // two const list
-    order_qt_sql """select id, c1['a'] from (select cast('{"a":1}' as variant) 
as c1, 1 as id union all select cast('{"a":1}' as variant) as c1, 2 as id) tmp 
order by id limit 100;"""
+    order_qt_sql """select id, cast(c1['a'] as text) from (select 
cast('{"a":1}' as variant) as c1, 1 as id union all select cast('{"a":1}' as 
variant) as c1, 2 as id) tmp order by id limit 100;"""
     order_qt_sql """select c1['a'] from (select id, c1 from (select 
cast('{"a":1}' as variant) as c1, 1 as id union all select cast('{"a":1}' as 
variant) as c1, 2 as id) tmp order by id limit 100) tmp;"""
-    order_qt_sql """select c2['b'] from (select id, c1['a'] as c2 from (select 
cast('{"a":{"b":1}}' as variant) as c1, 1 as id union all select 
cast('{"a":{"b":1}}' as variant) as c1, 2 as id) tmp order by id limit 100) 
tmp;"""
+    order_qt_sql """select cast(c2['b'] as text) from (select id, c1['a'] as 
c2 from (select cast('{"a":{"b":1}}' as variant) as c1, 1 as id union all 
select cast('{"a":{"b":1}}' as variant) as c1, 2 as id) tmp order by id limit 
100) tmp;"""
     order_qt_sql """select c2['a']['b'] from (select id, c1 as c2 from (select 
cast('{"a":{"b":1}}' as variant) as c1, 1 as id union all select 
cast('{"a":{"b":1}}' as variant) as c1, 2 as id) tmp order by id limit 100) 
tmp;"""
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to