This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 9aaa6ba3515e7c20afd50232700af5bbe0308ac0
Author: lihangyu <15605149...@163.com>
AuthorDate: Fri Jan 26 14:02:26 2024 +0800

    [Fix](Variant) fix variant lost null info after `cast_column` (#30153)
    
    This could result incorrect output in hirachinal cases
    
    ```
     sql """insert into ${table_name} values (-3, '{"a" : 1, "b" : 1.5, "c" : 
[1, 2, 3]}')"""
        sql """insert into  ${table_name} select -2, '{"a": 11245, "b" : [123, 
{"xx" : 1}], "c" : {"c" : 456, "d" : "null", "e" : 7.111}}'  as json_str
                union  all select -1, '{"a": 1123}' as json_str union all 
select *, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" = 
"4096") limit 4096 ;"""
    
    mysql> select v["c"] from var_rs where k = -3 or k = -2;
    +----------------------+
    | element_at(`v`, 'c') |
    +----------------------+
    | [1,2,3]              |
    | []                   |
    +----------------------+
    2 rows in set (0.04 sec)
    ```
---
 be/src/exprs/json_functions.cpp                    |  2 +-
 be/src/vec/common/schema_util.cpp                  | 23 +++++++++-
 be/src/vec/data_types/serde/data_type_serde.cpp    |  2 +-
 .../data/variant_p0/variant_hirachinal.out         | 21 ++++++++++
 .../suites/variant_p0/variant_hirachinal.groovy    | 49 ++++++++++++++++++++++
 5 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp
index ace18f1090f..724c78922e8 100644
--- a/be/src/exprs/json_functions.cpp
+++ b/be/src/exprs/json_functions.cpp
@@ -333,7 +333,7 @@ void JsonFunctions::merge_objects(rapidjson::Value& 
dst_object, rapidjson::Value
     for (auto src_it = src_object.MemberBegin(); src_it != 
src_object.MemberEnd(); ++src_it) {
         auto dst_it = dst_object.FindMember(src_it->name);
         if (dst_it != dst_object.MemberEnd()) {
-            if (src_it->value.IsObject()) {
+            if (src_it->value.IsObject() && dst_it->value.IsObject()) {
                 merge_objects(dst_it->value, src_it->value, allocator);
             } else {
                 if (dst_it->value.IsNull()) {
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index 0ceddf25b3f..dc6f65584f2 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -152,6 +152,23 @@ Status cast_column(const ColumnWithTypeAndName& arg, const 
DataTypePtr& type, Co
     Block tmp_block {arguments};
     size_t result_column = tmp_block.columns();
     auto ctx = FunctionContext::create_context(nullptr, {}, {});
+
+    // To prevent from null info lost, we should not call function since the 
function framework will wrap
+    // nullable to Variant instead of the root of Variant
+    // correct output: Nullable(Array(int)) -> 
Nullable(Variant(Nullable(Array(int))))
+    // incorrect output: Nullable(Array(int)) -> Nullable(Variant(Array(int)))
+    if (WhichDataType(remove_nullable(type)).is_variant_type()) {
+        // set variant root column/type to from column/type
+        auto variant = ColumnObject::create(true /*always nullable*/);
+        CHECK(arg.column->is_nullable());
+        variant->create_root(arg.type, arg.column->assume_mutable());
+        ColumnPtr nullable = ColumnNullable::create(
+                variant->get_ptr(),
+                
check_and_get_column<ColumnNullable>(arg.column.get())->get_null_map_column_ptr());
+        *result = type->is_nullable() ? nullable : variant->get_ptr();
+        return Status::OK();
+    }
+
     // We convert column string to jsonb type just add a string jsonb field to 
dst column instead of parse
     // each line in original string column.
     ctx->set_string_as_jsonb_string(true);
@@ -159,6 +176,8 @@ Status cast_column(const ColumnWithTypeAndName& arg, const 
DataTypePtr& type, Co
     RETURN_IF_ERROR(
             function->execute(ctx.get(), tmp_block, {0}, result_column, 
arg.column->size()));
     *result = 
tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const();
+    VLOG_DEBUG << fmt::format("{} before convert {}, after convert {}", 
arg.name,
+                              arg.column->get_name(), (*result)->get_name());
     return Status::OK();
 }
 
@@ -488,7 +507,9 @@ Status extract(ColumnPtr source, const PathInData& path, 
MutableColumnPtr& dst)
     size_t result_column = tmp_block.columns();
     tmp_block.insert({nullptr, json_type, ""});
     RETURN_IF_ERROR(function->execute(nullptr, tmp_block, argnum, 
result_column, source->size()));
-    dst = tmp_block.get_by_position(result_column).column->assume_mutable();
+    dst = tmp_block.get_by_position(result_column)
+                  .column->convert_to_full_column_if_const()
+                  ->assume_mutable();
     return Status::OK();
 }
 // ---------------------------
diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp 
b/be/src/vec/data_types/serde/data_type_serde.cpp
index 32466c47e84..62ba805b82f 100644
--- a/be/src/vec/data_types/serde/data_type_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_serde.cpp
@@ -45,8 +45,8 @@ DataTypeSerDeSPtrs create_data_type_serdes(const 
std::vector<SlotDescriptor*>& s
 void DataTypeSerDe::convert_array_to_rapidjson(const vectorized::Array& array,
                                                rapidjson::Value& target,
                                                
rapidjson::Document::AllocatorType& allocator) {
+    target.SetArray();
     for (const vectorized::Field& item : array) {
-        target.SetArray();
         rapidjson::Value val;
         convert_field_to_rapidjson(item, val, allocator);
         target.PushBack(val, allocator);
diff --git a/regression-test/data/variant_p0/variant_hirachinal.out 
b/regression-test/data/variant_p0/variant_hirachinal.out
new file mode 100644
index 00000000000..0f6db58969a
--- /dev/null
+++ b/regression-test/data/variant_p0/variant_hirachinal.out
@@ -0,0 +1,21 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+-3     {"a":1,"b":1.5,"c":[1,2,3]}
+-2     {"a":11245,"b":[123,{"xx":1}],"c":{"c":456,"d":"null","e":7.111}}
+-1     {"a":1123}
+0      {"a":1234,"xxxx":"kaana"}
+1      {"a":1234,"xxxx":"kaana"}
+2      {"a":1234,"xxxx":"kaana"}
+3      {"a":1234,"xxxx":"kaana"}
+4      {"a":1234,"xxxx":"kaana"}
+5      {"a":1234,"xxxx":"kaana"}
+6      {"a":1234,"xxxx":"kaana"}
+
+-- !sql --
+[1,2,3]
+{"c":456,"d":"null","e":7.111}
+
+-- !sql --
+1.5
+[123,{"xx":1}]
+
diff --git a/regression-test/suites/variant_p0/variant_hirachinal.groovy 
b/regression-test/suites/variant_p0/variant_hirachinal.groovy
new file mode 100644
index 00000000000..5ad12cdbeea
--- /dev/null
+++ b/regression-test/suites/variant_p0/variant_hirachinal.groovy
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("regression_test_variant_hirachinal", "variant_type"){
+    def set_be_config = { key, value ->
+        String backend_id;
+        def backendId_to_backendIP = [:]
+        def backendId_to_backendHttpPort = [:]
+        getBackendIpHttpPort(backendId_to_backendIP, 
backendId_to_backendHttpPort);
+
+        backend_id = backendId_to_backendIP.keySet()[0]
+        def (code, out, err) = 
update_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), key, value)
+        logger.info("update config: code=" + code + ", out=" + out + ", err=" 
+ err)
+    }
+ 
+    def table_name = "var_rs"
+    sql "DROP TABLE IF EXISTS ${table_name}"
+
+    sql """
+            CREATE TABLE IF NOT EXISTS ${table_name} (
+                k bigint,
+                v variant
+            )
+            DUPLICATE KEY(`k`)
+            DISTRIBUTED BY HASH(k) BUCKETS 1
+            properties("replication_num" = "1", "disable_auto_compaction" = 
"false");
+        """
+    sql "set experimental_enable_nereids_planner = false"
+    sql """insert into ${table_name} values (-3, '{"a" : 1, "b" : 1.5, "c" : 
[1, 2, 3]}')"""
+    sql """insert into  ${table_name} select -2, '{"a": 11245, "b" : [123, 
{"xx" : 1}], "c" : {"c" : 456, "d" : "null", "e" : 7.111}}'  as json_str
+            union  all select -1, '{"a": 1123}' as json_str union all select 
*, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" = "4096") 
limit 4096 ;"""
+    qt_sql "select * from ${table_name} order by k limit 10"
+    qt_sql "select v['c'] from ${table_name} where k = -3 or k = -2"
+    qt_sql "select v['b'] from ${table_name} where k = -3 or k = -2"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to