This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push: new 5bc15c779d3 [refactor](predefine) get type info by visitor and fix compaction (#49867) 5bc15c779d3 is described below commit 5bc15c779d323d914ae5c24c924bc51dde4033ef Author: lihangyu <lihan...@selectdb.com> AuthorDate: Tue Apr 8 19:11:50 2025 +0800 [refactor](predefine) get type info by visitor and fix compaction (#49867) --- be/src/olap/rowset/segment_v2/column_reader.cpp | 1 - be/src/vec/columns/column_object.cpp | 79 ++++++++++++--------- be/src/vec/common/field_visitors.h | 7 ++ be/src/vec/common/schema_util.cpp | 3 +- be/src/vec/data_types/convert_field_to_type.cpp | 10 +++ regression-test/data/variant_p0/desc.out | Bin 5397 -> 5529 bytes regression-test/data/variant_p0/predefine/load.out | Bin 6803 -> 7262 bytes .../schema_change/test_alter_add_drop_column.out | Bin 0 -> 187 bytes regression-test/suites/variant_p0/desc.groovy | 10 +-- .../suites/variant_p0/predefine/load.groovy | 27 +++++++ .../test_alter_add_drop_column.groovy | 26 ++++--- 11 files changed, 114 insertions(+), 49 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index a38871155ce..388200b88e2 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -1888,7 +1888,6 @@ Status DefaultValueColumnIterator::init(const ColumnIteratorOptions& opts) { // "NULL" is a special default value which means the default value is null. if (_has_default_value) { if (_default_value == "NULL") { - DCHECK(_is_nullable); _is_default_value_null = true; } else { _type_size = _type_info->size(); diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 00830d13a94..18ff9033cf9 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -151,6 +151,7 @@ public: } return 1 + dimensions; } + size_t operator()(const VariantField& x) { return apply_visitor(*this, x.get_field()); } template <typename T> size_t operator()(const T&) const { return 0; @@ -203,17 +204,32 @@ public: type = TypeIndex::VARIANT; return 1; } + size_t operator()(const VariantField& x) { + typed_field_info = + FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), x.get_precision()}; + return 1; + } template <typename T> size_t operator()(const T&) { type = TypeId<NearestFieldType<T>>::value; return 1; } - void get_scalar_type(TypeIndex* data_type) const { *data_type = type; } + void get_scalar_type(TypeIndex* data_type, int* precision, int* scale) const { + if (typed_field_info.has_value()) { + *data_type = typed_field_info->scalar_type_id; + *precision = typed_field_info->precision; + *scale = typed_field_info->scale; + return; + } + *data_type = type; + } bool contain_nulls() const { return have_nulls; } bool need_convert_field() const { return false; } private: + // initialized when operator()(const VariantField& x) + std::optional<FieldInfo> typed_field_info; TypeIndex type = TypeIndex::Nothing; bool have_nulls = false; }; @@ -271,6 +287,15 @@ public: type_indexes.insert(TypeIndex::VARIANT); return 0; } + size_t operator()(const VariantField& x) { + if (x.get_type_id() == TypeIndex::Array) { + apply_visitor(*this, x.get_field()); + } else { + typed_field_info = + FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), x.get_precision()}; + } + return 0; + } size_t operator()(const Null&) { have_nulls = true; return 0; @@ -282,7 +307,14 @@ public: type_indexes.insert(TypeId<NearestFieldType<T>>::value); return 0; } - void get_scalar_type(TypeIndex* type) const { + void get_scalar_type(TypeIndex* type, int* precision, int* scale) const { + if (typed_field_info.has_value()) { + // fast path + *type = typed_field_info->scalar_type_id; + *precision = typed_field_info->precision; + *scale = typed_field_info->scale; + return; + } DataTypePtr data_type; get_least_supertype_jsonb(type_indexes, &data_type); *type = data_type->get_type_id(); @@ -291,6 +323,8 @@ public: bool need_convert_field() const { return field_types.size() > 1; } private: + // initialized when operator()(const VariantField& x) + std::optional<FieldInfo> typed_field_info; phmap::flat_hash_set<TypeIndex> type_indexes; phmap::flat_hash_set<FieldType> field_types; bool have_nulls = false; @@ -334,49 +368,28 @@ void get_field_info_impl(const Field& field, FieldInfo* info) { Visitor to_scalar_type_visitor; apply_visitor(to_scalar_type_visitor, field); TypeIndex type_id; - to_scalar_type_visitor.get_scalar_type(&type_id); + int precision = 0; + int scale = 0; + to_scalar_type_visitor.get_scalar_type(&type_id, &precision, &scale); // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]] *info = { type_id, to_scalar_type_visitor.contain_nulls(), to_scalar_type_visitor.need_convert_field(), apply_visitor(FieldVisitorToNumberOfDimensions(), field), + scale, + precision, }; } -void get_base_field_info(const Field& field, FieldInfo* info) { - const auto& variant_field = field.get<const VariantField&>(); - const auto& wrapped_field = variant_field.get_field(); - if (variant_field.get_type_id() == TypeIndex::Array) { - if (wrapped_field.safe_get<Array>().empty()) { - info->scalar_type_id = TypeIndex::Nothing; - ++info->num_dimensions; - info->have_nulls = true; - info->need_convert = false; - } else { - ++info->num_dimensions; - get_base_field_info(wrapped_field.safe_get<Array>()[0], info); - } - return; - } - - // handle scalar types - info->scalar_type_id = variant_field.get_type_id(); - info->have_nulls = true; - info->need_convert = false; - info->scale = variant_field.get_scale(); - info->precision = variant_field.get_precision(); +bool is_complex_field(const Field& field) { + return field.is_complex_field() || + (field.is_variant_field() && + field.get<const VariantField&>().get_field().is_complex_field()); } void get_field_info(const Field& field, FieldInfo* info) { - if (field.is_variant_field()) { - // Currently we support specify predefined schema for other types include decimal, datetime ...etc - // so we should set specified info to create correct types, and those predefined types are static and - // type no need to deduce - get_base_field_info(field, info); - return; - } - if (field.is_complex_field()) { + if (is_complex_field(field)) { get_field_info_impl<FieldVisitorToScalarType>(field, info); } else { get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info); diff --git a/be/src/vec/common/field_visitors.h b/be/src/vec/common/field_visitors.h index eefa424c641..e5468867b07 100644 --- a/be/src/vec/common/field_visitors.h +++ b/be/src/vec/common/field_visitors.h @@ -25,6 +25,7 @@ #include "vec/common/demangle.h" #include "vec/core/accurate_comparison.h" #include "vec/core/field.h" +#include "vec/core/types.h" namespace doris::vectorized { @@ -71,6 +72,12 @@ typename std::decay_t<Visitor>::ResultType apply_visitor(Visitor&& visitor, F&& return visitor(field.template get<DecimalField<Decimal256>>()); case Field::Types::JSONB: return visitor(field.template get<JsonbField>()); + case Field::Types::Variant: + return visitor(field.template get<VariantField>()); + case Field::Types::IPv6: + return visitor(field.template get<IPv6>()); + case Field::Types::Int256: + return visitor(field.template get<Int128>()); default: throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Bad type of Field {}", static_cast<int>(field.get_type())); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index feadabc46da..17430380310 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -643,7 +643,8 @@ TabletColumn create_sparse_column(const TabletColumn& variant) { res.set_aggregation_method(variant.aggregation()); res.set_path_info(PathInData {variant.name_lower_case() + "." + SPARSE_COLUMN_PATH}); res.set_parent_unique_id(variant.unique_id()); - + // set default value to "NULL" DefaultColumnIterator will call insert_many_defaults + res.set_default_value("NULL"); TabletColumn child_tcolumn; child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING); res.add_sub_column(child_tcolumn); diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp index 55613e21921..2646d0e831d 100644 --- a/be/src/vec/data_types/convert_field_to_type.cpp +++ b/be/src/vec/data_types/convert_field_to_type.cpp @@ -44,6 +44,8 @@ #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_nullable.h" +#include "vec/io/io_helper.h" +#include "vec/runtime/ipv6_value.h" namespace doris::vectorized { #include "common/compile_check_begin.h" @@ -61,7 +63,12 @@ class FieldVisitorToStringSimple : public StaticVisitor<String> { public: String operator()(const Null& x) const { return "NULL"; } String operator()(const UInt64& x) const { return std::to_string(x); } + String operator()(const IPv6& x) const { + auto value = IPv6Value(x); + return value.to_string(); + } String operator()(const Int64& x) const { return std::to_string(x); } + String operator()(const Int128& x) const { return int128_to_string(x); } String operator()(const Float64& x) const { return std::to_string(x); } String operator()(const String& x) const { return x; } [[noreturn]] String operator()(const UInt128& x) const { @@ -73,6 +80,9 @@ public: [[noreturn]] String operator()(const Tuple& x) const { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); } + [[noreturn]] String operator()(const VariantField& x) const { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); + } [[noreturn]] String operator()(const DecimalField<Decimal32>& x) const { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); } diff --git a/regression-test/data/variant_p0/desc.out b/regression-test/data/variant_p0/desc.out index ffd87b9d05b..b6afa3f2101 100644 Binary files a/regression-test/data/variant_p0/desc.out and b/regression-test/data/variant_p0/desc.out differ diff --git a/regression-test/data/variant_p0/predefine/load.out b/regression-test/data/variant_p0/predefine/load.out index 06e06e16c58..ca4161c3a01 100644 Binary files a/regression-test/data/variant_p0/predefine/load.out and b/regression-test/data/variant_p0/predefine/load.out differ diff --git a/regression-test/data/variant_p0/schema_change/test_alter_add_drop_column.out b/regression-test/data/variant_p0/schema_change/test_alter_add_drop_column.out new file mode 100644 index 00000000000..33fb39ee1d0 Binary files /dev/null and b/regression-test/data/variant_p0/schema_change/test_alter_add_drop_column.out differ diff --git a/regression-test/suites/variant_p0/desc.groovy b/regression-test/suites/variant_p0/desc.groovy index 90ca4595fec..57ec6f360f3 100644 --- a/regression-test/suites/variant_p0/desc.groovy +++ b/regression-test/suites/variant_p0/desc.groovy @@ -57,7 +57,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){ ) DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS ${buckets} - properties("replication_num" = "1", "disable_auto_compaction" = "false"); + properties("replication_num" = "1", "disable_auto_compaction" = "false", "variant_max_subcolumns_count" = "0"); """ } @@ -76,7 +76,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){ PARTITION p3 VALUES LESS THAN (100000) ) DISTRIBUTED BY HASH(k) BUCKETS ${buckets} - properties("replication_num" = "1", "disable_auto_compaction" = "false"); + properties("replication_num" = "1", "disable_auto_compaction" = "false", "variant_max_subcolumns_count" = "0"); """ } @@ -185,7 +185,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){ ) DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS 5 - properties("replication_num" = "1", "disable_auto_compaction" = "false"); + properties("replication_num" = "1", "disable_auto_compaction" = "false", "variant_max_subcolumns_count" = "0"); """ sql """ insert into ${table_name} values (0, '{"a": 1123, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}, "zzz" : null, "oooo" : {"akakaka" : null, "xxxx" : {"xxx" : 123}}}')""" sql "select * from ${table_name} limit 1" @@ -228,7 +228,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){ ) DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS 5 - properties("replication_num" = "1", "disable_auto_compaction" = "false"); + properties("replication_num" = "1", "disable_auto_compaction" = "false", "variant_max_subcolumns_count" = "0"); """ sql """ insert into ${table_name} values (0, '{"名字" : "jack", "!@#^&*()": "11111", "金额" : 200, "画像" : {"地址" : "北京", "\\\u4E2C\\\u6587": "unicode"}}')""" sql """set describe_extend_variant_column = true""" @@ -244,7 +244,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){ ) DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS 5 - properties("replication_num" = "1", "disable_auto_compaction" = "false"); + properties("replication_num" = "1", "disable_auto_compaction" = "false", "variant_max_subcolumns_count" = "0"); """ sql """ insert into ${table_name} values (0, '{}')""" sql """ insert into ${table_name} values (0, '100')""" diff --git a/regression-test/suites/variant_p0/predefine/load.groovy b/regression-test/suites/variant_p0/predefine/load.groovy index 1093ee3a25f..7ccb8068024 100644 --- a/regression-test/suites/variant_p0/predefine/load.groovy +++ b/regression-test/suites/variant_p0/predefine/load.groovy @@ -292,4 +292,31 @@ suite("regression_test_variant_predefine_schema", "p0"){ "variant_max_subcolumns_count" = "0" ); """ + + // array with nulls + + sql "DROP TABLE IF EXISTS test_array_with_nulls" + // test bf with bool + sql """ + CREATE TABLE `test_array_with_nulls` ( + `k` bigint NULL, + `var` variant<array_decimal:array<decimalv3(27,9)>> + ) ENGINE=OLAP + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "variant_max_subcolumns_count" = "0" + ); + """ + sql """insert into test_array_with_nulls values(3, '{"array_decimal" : [null, 2.2, 3.3, 4.4]}')""" + qt_sql_arr_null_1 "select * from test_array_with_nulls order by k" + sql """insert into test_array_with_nulls values(1, '{"array_decimal" : [1.1, 2.2, 3.3, null]}')""" + sql """insert into test_array_with_nulls values(2, '{"array_decimal" : [1.1, 2.2, null, 4.4]}')""" + sql """insert into test_array_with_nulls values(4, '{"array_decimal" : [1.1, null, 3.3, 4.4]}')""" + sql """insert into test_array_with_nulls values(5, '{"array_decimal" : [1.1, 2.2, 3.3, 4.4]}')""" + sql """insert into test_array_with_nulls values(6, '{"array_decimal" : []}')""" + sql """insert into test_array_with_nulls values(7, '{"array_decimal" : [null, null]}')""" + qt_sql_arr_null_2 "select * from test_array_with_nulls order by k" } \ No newline at end of file diff --git a/regression-test/suites/variant_p0/schema_change/test_alter_add_drop_column.groovy b/regression-test/suites/variant_p0/schema_change/test_alter_add_drop_column.groovy index ce55a62af65..df2b3c46a60 100644 --- a/regression-test/suites/variant_p0/schema_change/test_alter_add_drop_column.groovy +++ b/regression-test/suites/variant_p0/schema_change/test_alter_add_drop_column.groovy @@ -25,7 +25,7 @@ suite("regression_test_variant_add_drop_column", "variant_type"){ ) DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS 1 - properties("replication_num" = "1"); + properties("replication_num" = "1", "disable_auto_compaction" = "true"); """ sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}')""" @@ -34,20 +34,28 @@ suite("regression_test_variant_add_drop_column", "variant_type"){ sql "alter table variant_add_drop_column add column t2 datetime default null" sql """insert into variant_add_drop_column values (1, '{"a" : 12345234567,"b" : 2}', '{"xxx" : 1}', "2021-01-01 01:01:01", "2021-01-01 01:01:01")""" sql "alter table variant_add_drop_column add column i1 int default null" - sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}', '{"xxx" : 1}', "2021-01-01 01:01:01", "2021-01-01 01:01:01", 12345)""" + sql """insert into variant_add_drop_column values (2, '{"a" : 12345,"b" : 2}', '{"xxx" : 1}', "2021-01-01 01:01:01", "2021-01-01 01:01:01", 12345)""" sql "alter table variant_add_drop_column drop column t1" - sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}', '{"xxx" : 1}', "2021-01-01 01:01:01", 12345)""" + sql """insert into variant_add_drop_column values (3, '{"a" : 12345,"b" : 2}', '{"xxx" : 1}', "2021-01-01 01:01:01", 12345)""" sql "alter table variant_add_drop_column drop column t2" - sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}', '{"xxx" : 1}', 12345)""" + sql """insert into variant_add_drop_column values (4, '{"a" : 12345,"b" : 2}', '{"xxx" : 1}', 12345)""" sql "alter table variant_add_drop_column drop column i1" - sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}', '{"xxx" : 1}')""" + sql """insert into variant_add_drop_column values (5, '{"a" : 12345,"b" : 2}', '{"xxx" : 1}')""" sql "alter table variant_add_drop_column drop column v" - sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}')""" + sql """insert into variant_add_drop_column values (6, '{"a" : 12345,"b" : 2}')""" sql "alter table variant_add_drop_column add column v variant default null" - sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}', '{"a" : 12345,"b" : 2}')""" + sql """insert into variant_add_drop_column values (7, '{"a" : 12345,"b" : 2}', '{"a" : 12345,"b" : 2}')""" sql "alter table variant_add_drop_column add column v3 variant default null" - sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}', '{"a" : 12345,"b" : 2}', '{"a" : 12345,"b" : 2}')""" + sql """insert into variant_add_drop_column values (8, '{"a" : 12345,"b" : 2}', '{"a" : 12345,"b" : 2}', '{"a" : 12345,"b" : 2}')""" sql "alter table variant_add_drop_column drop column v" sql "alter table variant_add_drop_column drop column v2" - sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 2}')""" + sql """insert into variant_add_drop_column values (9, '{"a" : 12345,"b" : 2}')""" + + // trigger compactions for all tablets in ${tableName} + def tablets = sql_return_maparray """ show tablets from ${table_name}; """ + + // trigger compactions for all tablets in ${tableName} + trigger_and_wait_compaction(table_name, "cumulative") + + qt_sql "select * from variant_add_drop_column order by k limit 10" } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org