This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 9aaa6ba3515e7c20afd50232700af5bbe0308ac0 Author: lihangyu <15605149...@163.com> AuthorDate: Fri Jan 26 14:02:26 2024 +0800 [Fix](Variant) fix variant lost null info after `cast_column` (#30153) This could result incorrect output in hirachinal cases ``` sql """insert into ${table_name} values (-3, '{"a" : 1, "b" : 1.5, "c" : [1, 2, 3]}')""" sql """insert into ${table_name} select -2, '{"a": 11245, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : "null", "e" : 7.111}}' as json_str union all select -1, '{"a": 1123}' as json_str union all select *, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" = "4096") limit 4096 ;""" mysql> select v["c"] from var_rs where k = -3 or k = -2; +----------------------+ | element_at(`v`, 'c') | +----------------------+ | [1,2,3] | | [] | +----------------------+ 2 rows in set (0.04 sec) ``` --- be/src/exprs/json_functions.cpp | 2 +- be/src/vec/common/schema_util.cpp | 23 +++++++++- be/src/vec/data_types/serde/data_type_serde.cpp | 2 +- .../data/variant_p0/variant_hirachinal.out | 21 ++++++++++ .../suites/variant_p0/variant_hirachinal.groovy | 49 ++++++++++++++++++++++ 5 files changed, 94 insertions(+), 3 deletions(-) diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index ace18f1090f..724c78922e8 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -333,7 +333,7 @@ void JsonFunctions::merge_objects(rapidjson::Value& dst_object, rapidjson::Value for (auto src_it = src_object.MemberBegin(); src_it != src_object.MemberEnd(); ++src_it) { auto dst_it = dst_object.FindMember(src_it->name); if (dst_it != dst_object.MemberEnd()) { - if (src_it->value.IsObject()) { + if (src_it->value.IsObject() && dst_it->value.IsObject()) { merge_objects(dst_it->value, src_it->value, allocator); } else { if (dst_it->value.IsNull()) { diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 0ceddf25b3f..dc6f65584f2 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -152,6 +152,23 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co Block tmp_block {arguments}; size_t result_column = tmp_block.columns(); auto ctx = FunctionContext::create_context(nullptr, {}, {}); + + // To prevent from null info lost, we should not call function since the function framework will wrap + // nullable to Variant instead of the root of Variant + // correct output: Nullable(Array(int)) -> Nullable(Variant(Nullable(Array(int)))) + // incorrect output: Nullable(Array(int)) -> Nullable(Variant(Array(int))) + if (WhichDataType(remove_nullable(type)).is_variant_type()) { + // set variant root column/type to from column/type + auto variant = ColumnObject::create(true /*always nullable*/); + CHECK(arg.column->is_nullable()); + variant->create_root(arg.type, arg.column->assume_mutable()); + ColumnPtr nullable = ColumnNullable::create( + variant->get_ptr(), + check_and_get_column<ColumnNullable>(arg.column.get())->get_null_map_column_ptr()); + *result = type->is_nullable() ? nullable : variant->get_ptr(); + return Status::OK(); + } + // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse // each line in original string column. ctx->set_string_as_jsonb_string(true); @@ -159,6 +176,8 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co RETURN_IF_ERROR( function->execute(ctx.get(), tmp_block, {0}, result_column, arg.column->size())); *result = tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const(); + VLOG_DEBUG << fmt::format("{} before convert {}, after convert {}", arg.name, + arg.column->get_name(), (*result)->get_name()); return Status::OK(); } @@ -488,7 +507,9 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst) size_t result_column = tmp_block.columns(); tmp_block.insert({nullptr, json_type, ""}); RETURN_IF_ERROR(function->execute(nullptr, tmp_block, argnum, result_column, source->size())); - dst = tmp_block.get_by_position(result_column).column->assume_mutable(); + dst = tmp_block.get_by_position(result_column) + .column->convert_to_full_column_if_const() + ->assume_mutable(); return Status::OK(); } // --------------------------- diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp b/be/src/vec/data_types/serde/data_type_serde.cpp index 32466c47e84..62ba805b82f 100644 --- a/be/src/vec/data_types/serde/data_type_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_serde.cpp @@ -45,8 +45,8 @@ DataTypeSerDeSPtrs create_data_type_serdes(const std::vector<SlotDescriptor*>& s void DataTypeSerDe::convert_array_to_rapidjson(const vectorized::Array& array, rapidjson::Value& target, rapidjson::Document::AllocatorType& allocator) { + target.SetArray(); for (const vectorized::Field& item : array) { - target.SetArray(); rapidjson::Value val; convert_field_to_rapidjson(item, val, allocator); target.PushBack(val, allocator); diff --git a/regression-test/data/variant_p0/variant_hirachinal.out b/regression-test/data/variant_p0/variant_hirachinal.out new file mode 100644 index 00000000000..0f6db58969a --- /dev/null +++ b/regression-test/data/variant_p0/variant_hirachinal.out @@ -0,0 +1,21 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +-3 {"a":1,"b":1.5,"c":[1,2,3]} +-2 {"a":11245,"b":[123,{"xx":1}],"c":{"c":456,"d":"null","e":7.111}} +-1 {"a":1123} +0 {"a":1234,"xxxx":"kaana"} +1 {"a":1234,"xxxx":"kaana"} +2 {"a":1234,"xxxx":"kaana"} +3 {"a":1234,"xxxx":"kaana"} +4 {"a":1234,"xxxx":"kaana"} +5 {"a":1234,"xxxx":"kaana"} +6 {"a":1234,"xxxx":"kaana"} + +-- !sql -- +[1,2,3] +{"c":456,"d":"null","e":7.111} + +-- !sql -- +1.5 +[123,{"xx":1}] + diff --git a/regression-test/suites/variant_p0/variant_hirachinal.groovy b/regression-test/suites/variant_p0/variant_hirachinal.groovy new file mode 100644 index 00000000000..5ad12cdbeea --- /dev/null +++ b/regression-test/suites/variant_p0/variant_hirachinal.groovy @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_hirachinal", "variant_type"){ + def set_be_config = { key, value -> + String backend_id; + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + backend_id = backendId_to_backendIP.keySet()[0] + def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) + logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) + } + + def table_name = "var_rs" + sql "DROP TABLE IF EXISTS ${table_name}" + + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 1 + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + sql "set experimental_enable_nereids_planner = false" + sql """insert into ${table_name} values (-3, '{"a" : 1, "b" : 1.5, "c" : [1, 2, 3]}')""" + sql """insert into ${table_name} select -2, '{"a": 11245, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : "null", "e" : 7.111}}' as json_str + union all select -1, '{"a": 1123}' as json_str union all select *, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" = "4096") limit 4096 ;""" + qt_sql "select * from ${table_name} order by k limit 10" + qt_sql "select v['c'] from ${table_name} where k = -3 or k = -2" + qt_sql "select v['b'] from ${table_name} where k = -3 or k = -2" +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org