This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new 5bc15c779d3 [refactor](predefine) get type info by visitor and fix 
compaction (#49867)
5bc15c779d3 is described below

commit 5bc15c779d323d914ae5c24c924bc51dde4033ef
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Tue Apr 8 19:11:50 2025 +0800

    [refactor](predefine) get type info by visitor and fix compaction (#49867)
---
 be/src/olap/rowset/segment_v2/column_reader.cpp    |   1 -
 be/src/vec/columns/column_object.cpp               |  79 ++++++++++++---------
 be/src/vec/common/field_visitors.h                 |   7 ++
 be/src/vec/common/schema_util.cpp                  |   3 +-
 be/src/vec/data_types/convert_field_to_type.cpp    |  10 +++
 regression-test/data/variant_p0/desc.out           | Bin 5397 -> 5529 bytes
 regression-test/data/variant_p0/predefine/load.out | Bin 6803 -> 7262 bytes
 .../schema_change/test_alter_add_drop_column.out   | Bin 0 -> 187 bytes
 regression-test/suites/variant_p0/desc.groovy      |  10 +--
 .../suites/variant_p0/predefine/load.groovy        |  27 +++++++
 .../test_alter_add_drop_column.groovy              |  26 ++++---
 11 files changed, 114 insertions(+), 49 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index a38871155ce..388200b88e2 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -1888,7 +1888,6 @@ Status DefaultValueColumnIterator::init(const 
ColumnIteratorOptions& opts) {
     // "NULL" is a special default value which means the default value is null.
     if (_has_default_value) {
         if (_default_value == "NULL") {
-            DCHECK(_is_nullable);
             _is_default_value_null = true;
         } else {
             _type_size = _type_info->size();
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 00830d13a94..18ff9033cf9 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -151,6 +151,7 @@ public:
         }
         return 1 + dimensions;
     }
+    size_t operator()(const VariantField& x) { return apply_visitor(*this, 
x.get_field()); }
     template <typename T>
     size_t operator()(const T&) const {
         return 0;
@@ -203,17 +204,32 @@ public:
         type = TypeIndex::VARIANT;
         return 1;
     }
+    size_t operator()(const VariantField& x) {
+        typed_field_info =
+                FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), 
x.get_precision()};
+        return 1;
+    }
     template <typename T>
     size_t operator()(const T&) {
         type = TypeId<NearestFieldType<T>>::value;
         return 1;
     }
-    void get_scalar_type(TypeIndex* data_type) const { *data_type = type; }
+    void get_scalar_type(TypeIndex* data_type, int* precision, int* scale) 
const {
+        if (typed_field_info.has_value()) {
+            *data_type = typed_field_info->scalar_type_id;
+            *precision = typed_field_info->precision;
+            *scale = typed_field_info->scale;
+            return;
+        }
+        *data_type = type;
+    }
     bool contain_nulls() const { return have_nulls; }
 
     bool need_convert_field() const { return false; }
 
 private:
+    // initialized when operator()(const VariantField& x)
+    std::optional<FieldInfo> typed_field_info;
     TypeIndex type = TypeIndex::Nothing;
     bool have_nulls = false;
 };
@@ -271,6 +287,15 @@ public:
         type_indexes.insert(TypeIndex::VARIANT);
         return 0;
     }
+    size_t operator()(const VariantField& x) {
+        if (x.get_type_id() == TypeIndex::Array) {
+            apply_visitor(*this, x.get_field());
+        } else {
+            typed_field_info =
+                    FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), 
x.get_precision()};
+        }
+        return 0;
+    }
     size_t operator()(const Null&) {
         have_nulls = true;
         return 0;
@@ -282,7 +307,14 @@ public:
         type_indexes.insert(TypeId<NearestFieldType<T>>::value);
         return 0;
     }
-    void get_scalar_type(TypeIndex* type) const {
+    void get_scalar_type(TypeIndex* type, int* precision, int* scale) const {
+        if (typed_field_info.has_value()) {
+            // fast path
+            *type = typed_field_info->scalar_type_id;
+            *precision = typed_field_info->precision;
+            *scale = typed_field_info->scale;
+            return;
+        }
         DataTypePtr data_type;
         get_least_supertype_jsonb(type_indexes, &data_type);
         *type = data_type->get_type_id();
@@ -291,6 +323,8 @@ public:
     bool need_convert_field() const { return field_types.size() > 1; }
 
 private:
+    // initialized when operator()(const VariantField& x)
+    std::optional<FieldInfo> typed_field_info;
     phmap::flat_hash_set<TypeIndex> type_indexes;
     phmap::flat_hash_set<FieldType> field_types;
     bool have_nulls = false;
@@ -334,49 +368,28 @@ void get_field_info_impl(const Field& field, FieldInfo* 
info) {
     Visitor to_scalar_type_visitor;
     apply_visitor(to_scalar_type_visitor, field);
     TypeIndex type_id;
-    to_scalar_type_visitor.get_scalar_type(&type_id);
+    int precision = 0;
+    int scale = 0;
+    to_scalar_type_visitor.get_scalar_type(&type_id, &precision, &scale);
     // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
     *info = {
             type_id,
             to_scalar_type_visitor.contain_nulls(),
             to_scalar_type_visitor.need_convert_field(),
             apply_visitor(FieldVisitorToNumberOfDimensions(), field),
+            scale,
+            precision,
     };
 }
 
-void get_base_field_info(const Field& field, FieldInfo* info) {
-    const auto& variant_field = field.get<const VariantField&>();
-    const auto& wrapped_field = variant_field.get_field();
-    if (variant_field.get_type_id() == TypeIndex::Array) {
-        if (wrapped_field.safe_get<Array>().empty()) {
-            info->scalar_type_id = TypeIndex::Nothing;
-            ++info->num_dimensions;
-            info->have_nulls = true;
-            info->need_convert = false;
-        } else {
-            ++info->num_dimensions;
-            get_base_field_info(wrapped_field.safe_get<Array>()[0], info);
-        }
-        return;
-    }
-
-    // handle scalar types
-    info->scalar_type_id = variant_field.get_type_id();
-    info->have_nulls = true;
-    info->need_convert = false;
-    info->scale = variant_field.get_scale();
-    info->precision = variant_field.get_precision();
+bool is_complex_field(const Field& field) {
+    return field.is_complex_field() ||
+           (field.is_variant_field() &&
+            field.get<const VariantField&>().get_field().is_complex_field());
 }
 
 void get_field_info(const Field& field, FieldInfo* info) {
-    if (field.is_variant_field()) {
-        // Currently we support specify predefined schema for other types 
include decimal, datetime ...etc
-        // so we should set specified info to create correct types, and those 
predefined types are static and
-        // type no need to deduce
-        get_base_field_info(field, info);
-        return;
-    }
-    if (field.is_complex_field()) {
+    if (is_complex_field(field)) {
         get_field_info_impl<FieldVisitorToScalarType>(field, info);
     } else {
         get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
diff --git a/be/src/vec/common/field_visitors.h 
b/be/src/vec/common/field_visitors.h
index eefa424c641..e5468867b07 100644
--- a/be/src/vec/common/field_visitors.h
+++ b/be/src/vec/common/field_visitors.h
@@ -25,6 +25,7 @@
 #include "vec/common/demangle.h"
 #include "vec/core/accurate_comparison.h"
 #include "vec/core/field.h"
+#include "vec/core/types.h"
 
 namespace doris::vectorized {
 
@@ -71,6 +72,12 @@ typename std::decay_t<Visitor>::ResultType 
apply_visitor(Visitor&& visitor, F&&
         return visitor(field.template get<DecimalField<Decimal256>>());
     case Field::Types::JSONB:
         return visitor(field.template get<JsonbField>());
+    case Field::Types::Variant:
+        return visitor(field.template get<VariantField>());
+    case Field::Types::IPv6:
+        return visitor(field.template get<IPv6>());
+    case Field::Types::Int256:
+        return visitor(field.template get<Int128>());
     default:
         throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Bad type of Field 
{}",
                                static_cast<int>(field.get_type()));
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index feadabc46da..17430380310 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -643,7 +643,8 @@ TabletColumn create_sparse_column(const TabletColumn& 
variant) {
     res.set_aggregation_method(variant.aggregation());
     res.set_path_info(PathInData {variant.name_lower_case() + "." + 
SPARSE_COLUMN_PATH});
     res.set_parent_unique_id(variant.unique_id());
-
+    // set default value to "NULL" DefaultColumnIterator will call 
insert_many_defaults
+    res.set_default_value("NULL");
     TabletColumn child_tcolumn;
     child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
     res.add_sub_column(child_tcolumn);
diff --git a/be/src/vec/data_types/convert_field_to_type.cpp 
b/be/src/vec/data_types/convert_field_to_type.cpp
index 55613e21921..2646d0e831d 100644
--- a/be/src/vec/data_types/convert_field_to_type.cpp
+++ b/be/src/vec/data_types/convert_field_to_type.cpp
@@ -44,6 +44,8 @@
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_array.h"
 #include "vec/data_types/data_type_nullable.h"
+#include "vec/io/io_helper.h"
+#include "vec/runtime/ipv6_value.h"
 
 namespace doris::vectorized {
 #include "common/compile_check_begin.h"
@@ -61,7 +63,12 @@ class FieldVisitorToStringSimple : public 
StaticVisitor<String> {
 public:
     String operator()(const Null& x) const { return "NULL"; }
     String operator()(const UInt64& x) const { return std::to_string(x); }
+    String operator()(const IPv6& x) const {
+        auto value = IPv6Value(x);
+        return value.to_string();
+    }
     String operator()(const Int64& x) const { return std::to_string(x); }
+    String operator()(const Int128& x) const { return int128_to_string(x); }
     String operator()(const Float64& x) const { return std::to_string(x); }
     String operator()(const String& x) const { return x; }
     [[noreturn]] String operator()(const UInt128& x) const {
@@ -73,6 +80,9 @@ public:
     [[noreturn]] String operator()(const Tuple& x) const {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Not 
implemeted");
     }
+    [[noreturn]] String operator()(const VariantField& x) const {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Not 
implemeted");
+    }
     [[noreturn]] String operator()(const DecimalField<Decimal32>& x) const {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Not 
implemeted");
     }
diff --git a/regression-test/data/variant_p0/desc.out 
b/regression-test/data/variant_p0/desc.out
index ffd87b9d05b..b6afa3f2101 100644
Binary files a/regression-test/data/variant_p0/desc.out and 
b/regression-test/data/variant_p0/desc.out differ
diff --git a/regression-test/data/variant_p0/predefine/load.out 
b/regression-test/data/variant_p0/predefine/load.out
index 06e06e16c58..ca4161c3a01 100644
Binary files a/regression-test/data/variant_p0/predefine/load.out and 
b/regression-test/data/variant_p0/predefine/load.out differ
diff --git 
a/regression-test/data/variant_p0/schema_change/test_alter_add_drop_column.out 
b/regression-test/data/variant_p0/schema_change/test_alter_add_drop_column.out
new file mode 100644
index 00000000000..33fb39ee1d0
Binary files /dev/null and 
b/regression-test/data/variant_p0/schema_change/test_alter_add_drop_column.out 
differ
diff --git a/regression-test/suites/variant_p0/desc.groovy 
b/regression-test/suites/variant_p0/desc.groovy
index 90ca4595fec..57ec6f360f3 100644
--- a/regression-test/suites/variant_p0/desc.groovy
+++ b/regression-test/suites/variant_p0/desc.groovy
@@ -57,7 +57,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){
             )
             DUPLICATE KEY(`k`)
             DISTRIBUTED BY HASH(k) BUCKETS ${buckets}
-            properties("replication_num" = "1", "disable_auto_compaction" = 
"false");
+            properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "variant_max_subcolumns_count" = "0");
         """
     }
 
@@ -76,7 +76,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){
                 PARTITION p3 VALUES LESS THAN (100000)
             )
             DISTRIBUTED BY HASH(k) BUCKETS ${buckets}
-            properties("replication_num" = "1", "disable_auto_compaction" = 
"false");
+            properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "variant_max_subcolumns_count" = "0");
         """
     }
 
@@ -185,7 +185,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){
             )
             DUPLICATE KEY(`k`)
             DISTRIBUTED BY HASH(k) BUCKETS 5
-            properties("replication_num" = "1", "disable_auto_compaction" = 
"false");
+            properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "variant_max_subcolumns_count" = "0");
         """
         sql """ insert into ${table_name} values (0, '{"a": 1123, "b" : [123, 
{"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}, "zzz" : null, "oooo" : 
{"akakaka" : null, "xxxx" : {"xxx" : 123}}}')"""
          sql "select * from ${table_name} limit 1"
@@ -228,7 +228,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){
             )
             DUPLICATE KEY(`k`)
             DISTRIBUTED BY HASH(k) BUCKETS 5
-            properties("replication_num" = "1", "disable_auto_compaction" = 
"false");
+            properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "variant_max_subcolumns_count" = "0");
         """
         sql """ insert into ${table_name} values (0, '{"名字" : "jack", 
"!@#^&*()": "11111", "金额" : 200, "画像" : {"地址" : "北京", "\\\u4E2C\\\u6587": 
"unicode"}}')"""
         sql """set describe_extend_variant_column = true"""
@@ -244,7 +244,7 @@ suite("regression_test_variant_desc", "nonConcurrent"){
             )
             DUPLICATE KEY(`k`)
             DISTRIBUTED BY HASH(k) BUCKETS 5
-            properties("replication_num" = "1", "disable_auto_compaction" = 
"false");
+            properties("replication_num" = "1", "disable_auto_compaction" = 
"false", "variant_max_subcolumns_count" = "0");
         """
         sql """ insert into ${table_name} values (0, '{}')"""
         sql """ insert into ${table_name} values (0, '100')"""
diff --git a/regression-test/suites/variant_p0/predefine/load.groovy 
b/regression-test/suites/variant_p0/predefine/load.groovy
index 1093ee3a25f..7ccb8068024 100644
--- a/regression-test/suites/variant_p0/predefine/load.groovy
+++ b/regression-test/suites/variant_p0/predefine/load.groovy
@@ -292,4 +292,31 @@ suite("regression_test_variant_predefine_schema", "p0"){
     "variant_max_subcolumns_count" = "0"
     );
     """
+
+    // array with nulls
+
+    sql "DROP TABLE IF EXISTS test_array_with_nulls"
+    // test bf with bool
+    sql """
+        CREATE TABLE `test_array_with_nulls` (
+      `k` bigint NULL,
+      `var` variant<array_decimal:array<decimalv3(27,9)>>
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`k`)
+    DISTRIBUTED BY HASH(`k`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "min_load_replica_num" = "-1",
+    "variant_max_subcolumns_count" = "0"
+    );
+    """
+    sql """insert into test_array_with_nulls values(3, '{"array_decimal" : 
[null, 2.2, 3.3, 4.4]}')"""
+    qt_sql_arr_null_1 "select * from test_array_with_nulls order by k"
+    sql """insert into test_array_with_nulls values(1, '{"array_decimal" : 
[1.1, 2.2, 3.3, null]}')"""
+    sql """insert into test_array_with_nulls values(2, '{"array_decimal" : 
[1.1, 2.2, null, 4.4]}')"""
+    sql """insert into test_array_with_nulls values(4, '{"array_decimal" : 
[1.1, null, 3.3, 4.4]}')"""
+    sql """insert into test_array_with_nulls values(5, '{"array_decimal" : 
[1.1, 2.2, 3.3, 4.4]}')"""
+    sql """insert into test_array_with_nulls values(6, '{"array_decimal" : 
[]}')"""
+    sql """insert into test_array_with_nulls values(7, '{"array_decimal" : 
[null, null]}')"""
+    qt_sql_arr_null_2 "select * from test_array_with_nulls order by k"
 }
\ No newline at end of file
diff --git 
a/regression-test/suites/variant_p0/schema_change/test_alter_add_drop_column.groovy
 
b/regression-test/suites/variant_p0/schema_change/test_alter_add_drop_column.groovy
index ce55a62af65..df2b3c46a60 100644
--- 
a/regression-test/suites/variant_p0/schema_change/test_alter_add_drop_column.groovy
+++ 
b/regression-test/suites/variant_p0/schema_change/test_alter_add_drop_column.groovy
@@ -25,7 +25,7 @@ suite("regression_test_variant_add_drop_column", 
"variant_type"){
         )
         DUPLICATE KEY(`k`)
         DISTRIBUTED BY HASH(k) BUCKETS 1
-        properties("replication_num" = "1");
+        properties("replication_num" = "1", "disable_auto_compaction" = 
"true");
     """
     sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}')"""
 
@@ -34,20 +34,28 @@ suite("regression_test_variant_add_drop_column", 
"variant_type"){
     sql "alter table variant_add_drop_column add column t2 datetime default 
null"
     sql """insert into variant_add_drop_column values (1, '{"a" : 
12345234567,"b" : 2}', '{"xxx" : 1}', "2021-01-01 01:01:01", "2021-01-01 
01:01:01")"""
     sql "alter table variant_add_drop_column add column i1 int default null"
-    sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}', '{"xxx" : 1}', "2021-01-01 01:01:01", "2021-01-01 01:01:01", 12345)"""
+    sql """insert into variant_add_drop_column values (2, '{"a" : 12345,"b" : 
2}', '{"xxx" : 1}', "2021-01-01 01:01:01", "2021-01-01 01:01:01", 12345)"""
     sql "alter table variant_add_drop_column drop column t1"
-    sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}', '{"xxx" : 1}', "2021-01-01 01:01:01", 12345)"""
+    sql """insert into variant_add_drop_column values (3, '{"a" : 12345,"b" : 
2}', '{"xxx" : 1}', "2021-01-01 01:01:01", 12345)"""
     sql "alter table variant_add_drop_column drop column t2"
-    sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}', '{"xxx" : 1}', 12345)"""
+    sql """insert into variant_add_drop_column values (4, '{"a" : 12345,"b" : 
2}', '{"xxx" : 1}', 12345)"""
     sql "alter table variant_add_drop_column drop column i1"
-    sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}', '{"xxx" : 1}')"""
+    sql """insert into variant_add_drop_column values (5, '{"a" : 12345,"b" : 
2}', '{"xxx" : 1}')"""
     sql "alter table variant_add_drop_column drop column v"
-    sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}')"""
+    sql """insert into variant_add_drop_column values (6, '{"a" : 12345,"b" : 
2}')"""
     sql "alter table variant_add_drop_column add column v variant default null"
-    sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}', '{"a" : 12345,"b" : 2}')"""
+    sql """insert into variant_add_drop_column values (7, '{"a" : 12345,"b" : 
2}', '{"a" : 12345,"b" : 2}')"""
     sql "alter table variant_add_drop_column add column v3 variant default 
null"
-    sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}', '{"a" : 12345,"b" : 2}', '{"a" : 12345,"b" : 2}')"""
+    sql """insert into variant_add_drop_column values (8, '{"a" : 12345,"b" : 
2}', '{"a" : 12345,"b" : 2}', '{"a" : 12345,"b" : 2}')"""
     sql "alter table variant_add_drop_column drop column v"
     sql "alter table variant_add_drop_column drop column v2"
-    sql """insert into variant_add_drop_column values (1, '{"a" : 12345,"b" : 
2}')"""
+    sql """insert into variant_add_drop_column values (9, '{"a" : 12345,"b" : 
2}')"""
+
+    // trigger compactions for all tablets in ${tableName}
+    def tablets = sql_return_maparray """ show tablets from ${table_name}; """
+
+    // trigger compactions for all tablets in ${tableName}
+    trigger_and_wait_compaction(table_name, "cumulative")
+
+    qt_sql "select * from variant_add_drop_column order by k limit 10"
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to