This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new f7c19d49806 [feature](function) implement variant_type to get inner 
types in variant columns (#49909)
f7c19d49806 is described below

commit f7c19d49806b63aa2b27168e054afa2d8523973d
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Thu Apr 10 10:49:24 2025 +0800

    [feature](function) implement variant_type to get inner types in variant 
columns (#49909)
---
 be/src/vec/columns/column_object.cpp               | 290 +++------------------
 be/src/vec/columns/column_object.h                 |   1 -
 be/src/vec/common/schema_util.cpp                  | 226 ++++++++++++++++
 be/src/vec/common/schema_util.h                    |   2 +
 be/src/vec/data_types/data_type_jsonb.h            |   4 +-
 be/src/vec/functions/function_variant_type.cpp     | 105 ++++++++
 be/src/vec/functions/simple_function_factory.h     |   2 +
 be/src/vec/json/parse2column.cpp                   |   2 +-
 .../doris/catalog/BuiltinScalarFunctions.java      |   4 +-
 .../functions/scalar/GetVariantType.java           |  69 +++++
 .../expressions/visitor/ScalarFunctionVisitor.java |   5 +
 regression-test/data/variant_p0/predefine/load.out | Bin 7454 -> 7526 bytes
 .../suites/variant_p0/predefine/load.groovy        |  22 +-
 13 files changed, 466 insertions(+), 266 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 18ff9033cf9..1b607a7e87e 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -137,265 +137,8 @@ size_t get_number_of_dimensions(const IDataType& type) {
     }
     return num_dimensions;
 }
-
-/// Calculates number of dimensions in array field.
-/// Returns 0 for scalar fields.
-class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
-public:
-    size_t operator()(const Array& x) const {
-        const size_t size = x.size();
-        size_t dimensions = 0;
-        for (size_t i = 0; i < size; ++i) {
-            size_t element_dimensions = apply_visitor(*this, x[i]);
-            dimensions = std::max(dimensions, element_dimensions);
-        }
-        return 1 + dimensions;
-    }
-    size_t operator()(const VariantField& x) { return apply_visitor(*this, 
x.get_field()); }
-    template <typename T>
-    size_t operator()(const T&) const {
-        return 0;
-    }
-};
-
-// Visitor that allows to get type of scalar field
-// but exclude fields contain complex field.This is a faster version
-// for FieldVisitorToScalarType which does not support complex field.
-class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> {
-public:
-    size_t operator()(const Array& x) {
-        throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not 
supported");
-    }
-    size_t operator()(const UInt64& x) {
-        if (x <= std::numeric_limits<Int8>::max()) {
-            type = TypeIndex::Int8;
-        } else if (x <= std::numeric_limits<Int16>::max()) {
-            type = TypeIndex::Int16;
-        } else if (x <= std::numeric_limits<Int32>::max()) {
-            type = TypeIndex::Int32;
-        } else {
-            type = TypeIndex::Int64;
-        }
-        return 1;
-    }
-    size_t operator()(const Int64& x) {
-        if (x <= std::numeric_limits<Int8>::max() && x >= 
std::numeric_limits<Int8>::min()) {
-            type = TypeIndex::Int8;
-        } else if (x <= std::numeric_limits<Int16>::max() &&
-                   x >= std::numeric_limits<Int16>::min()) {
-            type = TypeIndex::Int16;
-        } else if (x <= std::numeric_limits<Int32>::max() &&
-                   x >= std::numeric_limits<Int32>::min()) {
-            type = TypeIndex::Int32;
-        } else {
-            type = TypeIndex::Int64;
-        }
-        return 1;
-    }
-    size_t operator()(const JsonbField& x) {
-        type = TypeIndex::JSONB;
-        return 1;
-    }
-    size_t operator()(const Null&) {
-        have_nulls = true;
-        return 1;
-    }
-    size_t operator()(const VariantMap&) {
-        type = TypeIndex::VARIANT;
-        return 1;
-    }
-    size_t operator()(const VariantField& x) {
-        typed_field_info =
-                FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), 
x.get_precision()};
-        return 1;
-    }
-    template <typename T>
-    size_t operator()(const T&) {
-        type = TypeId<NearestFieldType<T>>::value;
-        return 1;
-    }
-    void get_scalar_type(TypeIndex* data_type, int* precision, int* scale) 
const {
-        if (typed_field_info.has_value()) {
-            *data_type = typed_field_info->scalar_type_id;
-            *precision = typed_field_info->precision;
-            *scale = typed_field_info->scale;
-            return;
-        }
-        *data_type = type;
-    }
-    bool contain_nulls() const { return have_nulls; }
-
-    bool need_convert_field() const { return false; }
-
-private:
-    // initialized when operator()(const VariantField& x)
-    std::optional<FieldInfo> typed_field_info;
-    TypeIndex type = TypeIndex::Nothing;
-    bool have_nulls = false;
-};
-
-/// Visitor that allows to get type of scalar field
-/// or least common type of scalars in array.
-/// More optimized version of FieldToDataType.
-class FieldVisitorToScalarType : public StaticVisitor<size_t> {
-public:
-    using FieldType = Field::Types::Which;
-    size_t operator()(const Array& x) {
-        size_t size = x.size();
-        for (size_t i = 0; i < size; ++i) {
-            apply_visitor(*this, x[i]);
-        }
-        return 0;
-    }
-    // TODO doris not support unsigned integers for now
-    // treat as signed integers
-    size_t operator()(const UInt64& x) {
-        field_types.insert(FieldType::UInt64);
-        if (x <= std::numeric_limits<Int8>::max()) {
-            type_indexes.insert(TypeIndex::Int8);
-        } else if (x <= std::numeric_limits<Int16>::max()) {
-            type_indexes.insert(TypeIndex::Int16);
-        } else if (x <= std::numeric_limits<Int32>::max()) {
-            type_indexes.insert(TypeIndex::Int32);
-        } else {
-            type_indexes.insert(TypeIndex::Int64);
-        }
-        return 0;
-    }
-    size_t operator()(const Int64& x) {
-        field_types.insert(FieldType::Int64);
-        if (x <= std::numeric_limits<Int8>::max() && x >= 
std::numeric_limits<Int8>::min()) {
-            type_indexes.insert(TypeIndex::Int8);
-        } else if (x <= std::numeric_limits<Int16>::max() &&
-                   x >= std::numeric_limits<Int16>::min()) {
-            type_indexes.insert(TypeIndex::Int16);
-        } else if (x <= std::numeric_limits<Int32>::max() &&
-                   x >= std::numeric_limits<Int32>::min()) {
-            type_indexes.insert(TypeIndex::Int32);
-        } else {
-            type_indexes.insert(TypeIndex::Int64);
-        }
-        return 0;
-    }
-    size_t operator()(const JsonbField& x) {
-        field_types.insert(FieldType::JSONB);
-        type_indexes.insert(TypeIndex::JSONB);
-        return 0;
-    }
-    size_t operator()(const VariantMap&) {
-        field_types.insert(FieldType::VariantMap);
-        type_indexes.insert(TypeIndex::VARIANT);
-        return 0;
-    }
-    size_t operator()(const VariantField& x) {
-        if (x.get_type_id() == TypeIndex::Array) {
-            apply_visitor(*this, x.get_field());
-        } else {
-            typed_field_info =
-                    FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), 
x.get_precision()};
-        }
-        return 0;
-    }
-    size_t operator()(const Null&) {
-        have_nulls = true;
-        return 0;
-    }
-    template <typename T>
-    size_t operator()(const T&) {
-        Field::EnumToType<Field::Types::Array>::Type a;
-        field_types.insert(Field::TypeToEnum<NearestFieldType<T>>::value);
-        type_indexes.insert(TypeId<NearestFieldType<T>>::value);
-        return 0;
-    }
-    void get_scalar_type(TypeIndex* type, int* precision, int* scale) const {
-        if (typed_field_info.has_value()) {
-            // fast path
-            *type = typed_field_info->scalar_type_id;
-            *precision = typed_field_info->precision;
-            *scale = typed_field_info->scale;
-            return;
-        }
-        DataTypePtr data_type;
-        get_least_supertype_jsonb(type_indexes, &data_type);
-        *type = data_type->get_type_id();
-    }
-    bool contain_nulls() const { return have_nulls; }
-    bool need_convert_field() const { return field_types.size() > 1; }
-
-private:
-    // initialized when operator()(const VariantField& x)
-    std::optional<FieldInfo> typed_field_info;
-    phmap::flat_hash_set<TypeIndex> type_indexes;
-    phmap::flat_hash_set<FieldType> field_types;
-    bool have_nulls = false;
-};
-
-/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
-/// and replaces all scalars or nested arrays to @replacement at that level.
-class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
-public:
-    FieldVisitorReplaceScalars(const Field& replacement_, size_t 
num_dimensions_to_keep_)
-            : replacement(replacement_), 
num_dimensions_to_keep(num_dimensions_to_keep_) {}
-
-    Field operator()(const Array& x) const {
-        if (num_dimensions_to_keep == 0) {
-            return replacement;
-        }
-
-        const size_t size = x.size();
-        Array res(size);
-        for (size_t i = 0; i < size; ++i) {
-            res[i] = apply_visitor(
-                    FieldVisitorReplaceScalars(replacement, 
num_dimensions_to_keep - 1), x[i]);
-        }
-        return res;
-    }
-
-    template <typename T>
-    Field operator()(const T&) const {
-        return replacement;
-    }
-
-private:
-    const Field& replacement;
-    size_t num_dimensions_to_keep;
-};
-
 } // namespace
 
-template <typename Visitor>
-void get_field_info_impl(const Field& field, FieldInfo* info) {
-    Visitor to_scalar_type_visitor;
-    apply_visitor(to_scalar_type_visitor, field);
-    TypeIndex type_id;
-    int precision = 0;
-    int scale = 0;
-    to_scalar_type_visitor.get_scalar_type(&type_id, &precision, &scale);
-    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
-    *info = {
-            type_id,
-            to_scalar_type_visitor.contain_nulls(),
-            to_scalar_type_visitor.need_convert_field(),
-            apply_visitor(FieldVisitorToNumberOfDimensions(), field),
-            scale,
-            precision,
-    };
-}
-
-bool is_complex_field(const Field& field) {
-    return field.is_complex_field() ||
-           (field.is_variant_field() &&
-            field.get<const VariantField&>().get_field().is_complex_field());
-}
-
-void get_field_info(const Field& field, FieldInfo* info) {
-    if (is_complex_field(field)) {
-        get_field_info_impl<FieldVisitorToScalarType>(field, info);
-    } else {
-        get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
-    }
-}
-
 #ifdef NDEBUG
 #define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */
 #else
@@ -472,7 +215,7 @@ Field get_field_from_variant_field(const Field& field) {
 
 void ColumnObject::Subcolumn::insert(Field field) {
     FieldInfo info;
-    get_field_info(field, &info);
+    schema_util::get_field_info(field, &info);
     field = get_field_from_variant_field(field);
     insert(std::move(field), std::move(info));
 }
@@ -2481,6 +2224,37 @@ bool 
ColumnObject::try_insert_many_defaults_from_nested(const Subcolumns::NodePt
     return true;
 }
 
+/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
+/// and replaces all scalars or nested arrays to @replacement at that level.
+class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
+public:
+    FieldVisitorReplaceScalars(const Field& replacement_, size_t 
num_dimensions_to_keep_)
+            : replacement(replacement_), 
num_dimensions_to_keep(num_dimensions_to_keep_) {}
+
+    Field operator()(const Array& x) const {
+        if (num_dimensions_to_keep == 0) {
+            return replacement;
+        }
+
+        const size_t size = x.size();
+        Array res(size);
+        for (size_t i = 0; i < size; ++i) {
+            res[i] = apply_visitor(
+                    FieldVisitorReplaceScalars(replacement, 
num_dimensions_to_keep - 1), x[i]);
+        }
+        return res;
+    }
+
+    template <typename T>
+    Field operator()(const T&) const {
+        return replacement;
+    }
+
+private:
+    const Field& replacement;
+    size_t num_dimensions_to_keep;
+};
+
 bool ColumnObject::try_insert_default_from_nested(const Subcolumns::NodePtr& 
entry) const {
     const auto* leaf = get_leaf_of_the_same_nested(entry);
     if (!leaf) {
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index ad8a693e736..8a76151bba6 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -84,7 +84,6 @@ struct FieldInfo {
     int precision = 0;
 };
 
-void get_field_info(const Field& field, FieldInfo* info);
 /** A column that represents object with dynamic set of subcolumns.
  *  Subcolumns are identified by paths in document and are stored in
  *  a trie-like structure. ColumnObject is not suitable for writing into tables
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index 17430380310..12049e20358 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -64,6 +64,7 @@
 #include "vec/columns/column_object.h"
 #include "vec/columns/columns_number.h"
 #include "vec/common/assert_cast.h"
+#include "vec/common/field_visitors.h"
 #include "vec/common/typeid_cast.h"
 #include "vec/core/block.h"
 #include "vec/core/column_numbers.h"
@@ -904,5 +905,230 @@ void calculate_variant_stats(const IColumn& 
encoded_sparse_column,
     }
 }
 
+/// Calculates number of dimensions in array field.
+/// Returns 0 for scalar fields.
+class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
+public:
+    size_t operator()(const Array& x) const {
+        const size_t size = x.size();
+        size_t dimensions = 0;
+        for (size_t i = 0; i < size; ++i) {
+            size_t element_dimensions = apply_visitor(*this, x[i]);
+            dimensions = std::max(dimensions, element_dimensions);
+        }
+        return 1 + dimensions;
+    }
+    size_t operator()(const VariantField& x) { return apply_visitor(*this, 
x.get_field()); }
+    template <typename T>
+    size_t operator()(const T&) const {
+        return 0;
+    }
+};
+
+// Visitor that allows to get type of scalar field
+// but exclude fields contain complex field.This is a faster version
+// for FieldVisitorToScalarType which does not support complex field.
+class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> {
+public:
+    size_t operator()(const Array& x) {
+        throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not 
supported");
+    }
+    size_t operator()(const UInt64& x) {
+        if (x <= std::numeric_limits<Int8>::max()) {
+            type = TypeIndex::Int8;
+        } else if (x <= std::numeric_limits<Int16>::max()) {
+            type = TypeIndex::Int16;
+        } else if (x <= std::numeric_limits<Int32>::max()) {
+            type = TypeIndex::Int32;
+        } else {
+            type = TypeIndex::Int64;
+        }
+        return 1;
+    }
+    size_t operator()(const Int64& x) {
+        if (x <= std::numeric_limits<Int8>::max() && x >= 
std::numeric_limits<Int8>::min()) {
+            type = TypeIndex::Int8;
+        } else if (x <= std::numeric_limits<Int16>::max() &&
+                   x >= std::numeric_limits<Int16>::min()) {
+            type = TypeIndex::Int16;
+        } else if (x <= std::numeric_limits<Int32>::max() &&
+                   x >= std::numeric_limits<Int32>::min()) {
+            type = TypeIndex::Int32;
+        } else {
+            type = TypeIndex::Int64;
+        }
+        return 1;
+    }
+    size_t operator()(const JsonbField& x) {
+        type = TypeIndex::JSONB;
+        return 1;
+    }
+    size_t operator()(const Null&) {
+        have_nulls = true;
+        return 1;
+    }
+    size_t operator()(const VariantMap&) {
+        type = TypeIndex::VARIANT;
+        return 1;
+    }
+    size_t operator()(const VariantField& x) {
+        typed_field_info =
+                FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), 
x.get_precision()};
+        return 1;
+    }
+    template <typename T>
+    size_t operator()(const T&) {
+        type = TypeId<NearestFieldType<T>>::value;
+        return 1;
+    }
+    void get_scalar_type(TypeIndex* data_type, int* precision, int* scale) 
const {
+        if (typed_field_info.has_value()) {
+            *data_type = typed_field_info->scalar_type_id;
+            *precision = typed_field_info->precision;
+            *scale = typed_field_info->scale;
+            return;
+        }
+        *data_type = type;
+    }
+    bool contain_nulls() const { return have_nulls; }
+
+    bool need_convert_field() const { return false; }
+
+private:
+    // initialized when operator()(const VariantField& x)
+    std::optional<FieldInfo> typed_field_info;
+    TypeIndex type = TypeIndex::Nothing;
+    bool have_nulls = false;
+};
+
+/// Visitor that allows to get type of scalar field
+/// or least common type of scalars in array.
+/// More optimized version of FieldToDataType.
+class FieldVisitorToScalarType : public StaticVisitor<size_t> {
+public:
+    using FieldType = Field::Types::Which;
+    size_t operator()(const Array& x) {
+        size_t size = x.size();
+        for (size_t i = 0; i < size; ++i) {
+            apply_visitor(*this, x[i]);
+        }
+        return 0;
+    }
+    // TODO doris not support unsigned integers for now
+    // treat as signed integers
+    size_t operator()(const UInt64& x) {
+        field_types.insert(FieldType::UInt64);
+        if (x <= std::numeric_limits<Int8>::max()) {
+            type_indexes.insert(TypeIndex::Int8);
+        } else if (x <= std::numeric_limits<Int16>::max()) {
+            type_indexes.insert(TypeIndex::Int16);
+        } else if (x <= std::numeric_limits<Int32>::max()) {
+            type_indexes.insert(TypeIndex::Int32);
+        } else {
+            type_indexes.insert(TypeIndex::Int64);
+        }
+        return 0;
+    }
+    size_t operator()(const Int64& x) {
+        field_types.insert(FieldType::Int64);
+        if (x <= std::numeric_limits<Int8>::max() && x >= 
std::numeric_limits<Int8>::min()) {
+            type_indexes.insert(TypeIndex::Int8);
+        } else if (x <= std::numeric_limits<Int16>::max() &&
+                   x >= std::numeric_limits<Int16>::min()) {
+            type_indexes.insert(TypeIndex::Int16);
+        } else if (x <= std::numeric_limits<Int32>::max() &&
+                   x >= std::numeric_limits<Int32>::min()) {
+            type_indexes.insert(TypeIndex::Int32);
+        } else {
+            type_indexes.insert(TypeIndex::Int64);
+        }
+        return 0;
+    }
+    size_t operator()(const JsonbField& x) {
+        field_types.insert(FieldType::JSONB);
+        type_indexes.insert(TypeIndex::JSONB);
+        return 0;
+    }
+    size_t operator()(const VariantMap&) {
+        field_types.insert(FieldType::VariantMap);
+        type_indexes.insert(TypeIndex::VARIANT);
+        return 0;
+    }
+    size_t operator()(const VariantField& x) {
+        if (x.get_type_id() == TypeIndex::Array) {
+            apply_visitor(*this, x.get_field());
+        } else {
+            typed_field_info =
+                    FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), 
x.get_precision()};
+        }
+        return 0;
+    }
+    size_t operator()(const Null&) {
+        have_nulls = true;
+        return 0;
+    }
+    template <typename T>
+    size_t operator()(const T&) {
+        Field::EnumToType<Field::Types::Array>::Type a;
+        field_types.insert(Field::TypeToEnum<NearestFieldType<T>>::value);
+        type_indexes.insert(TypeId<NearestFieldType<T>>::value);
+        return 0;
+    }
+    void get_scalar_type(TypeIndex* type, int* precision, int* scale) const {
+        if (typed_field_info.has_value()) {
+            // fast path
+            *type = typed_field_info->scalar_type_id;
+            *precision = typed_field_info->precision;
+            *scale = typed_field_info->scale;
+            return;
+        }
+        DataTypePtr data_type;
+        get_least_supertype_jsonb(type_indexes, &data_type);
+        *type = data_type->get_type_id();
+    }
+    bool contain_nulls() const { return have_nulls; }
+    bool need_convert_field() const { return field_types.size() > 1; }
+
+private:
+    // initialized when operator()(const VariantField& x)
+    std::optional<FieldInfo> typed_field_info;
+    phmap::flat_hash_set<TypeIndex> type_indexes;
+    phmap::flat_hash_set<FieldType> field_types;
+    bool have_nulls = false;
+};
+
+template <typename Visitor>
+void get_field_info_impl(const Field& field, FieldInfo* info) {
+    Visitor to_scalar_type_visitor;
+    apply_visitor(to_scalar_type_visitor, field);
+    TypeIndex type_id;
+    int precision = 0;
+    int scale = 0;
+    to_scalar_type_visitor.get_scalar_type(&type_id, &precision, &scale);
+    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
+    *info = {
+            type_id,
+            to_scalar_type_visitor.contain_nulls(),
+            to_scalar_type_visitor.need_convert_field(),
+            apply_visitor(FieldVisitorToNumberOfDimensions(), field),
+            scale,
+            precision,
+    };
+}
+
+bool is_complex_field(const Field& field) {
+    return field.is_complex_field() ||
+           (field.is_variant_field() &&
+            field.get<const VariantField&>().get_field().is_complex_field());
+}
+
+void get_field_info(const Field& field, FieldInfo* info) {
+    if (is_complex_field(field)) {
+        get_field_info_impl<FieldVisitorToScalarType>(field, info);
+    } else {
+        get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
+    }
+}
+
 #include "common/compile_check_end.h"
 } // namespace doris::vectorized::schema_util
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index 8281cdec7b6..eba6869964e 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -152,4 +152,6 @@ void calculate_variant_stats(const IColumn& 
encoded_sparse_column,
                              segment_v2::VariantStatisticsPB* stats, size_t 
row_pos,
                              size_t num_rows);
 
+void get_field_info(const Field& field, FieldInfo* info);
+
 } // namespace  doris::vectorized::schema_util
diff --git a/be/src/vec/data_types/data_type_jsonb.h 
b/be/src/vec/data_types/data_type_jsonb.h
index 25d90d37482..6fd188574dd 100644
--- a/be/src/vec/data_types/data_type_jsonb.h
+++ b/be/src/vec/data_types/data_type_jsonb.h
@@ -87,9 +87,7 @@ public:
     // Return JsonbField.
     Field get_type_field(const IColumn& column, size_t row) const override {
         const auto& column_data = static_cast<const ColumnString&>(column);
-        Field field =
-                JsonbField(column_data.get_data_at(row).data, 
column_data.get_data_at(row).size);
-        return VariantField(std::move(field), TypeIndex::JSONB);
+        return JsonbField(column_data.get_data_at(row).data, 
column_data.get_data_at(row).size);
     }
 
     bool equals(const IDataType& rhs) const override;
diff --git a/be/src/vec/functions/function_variant_type.cpp 
b/be/src/vec/functions/function_variant_type.cpp
new file mode 100644
index 00000000000..8e541a6958f
--- /dev/null
+++ b/be/src/vec/functions/function_variant_type.cpp
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include <glog/logging.h>
+
+#include "vec/columns/column_object.h"
+#include "vec/common/schema_util.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris {
+class FunctionContext;
+} // namespace doris
+
+namespace doris::vectorized {
+
+// get data type of variant column
+class FunctionVariantType : public IFunction {
+public:
+    static constexpr auto name = "variant_type";
+    static FunctionPtr create() { return 
std::make_shared<FunctionVariantType>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return std::make_shared<DataTypeString>();
+    }
+
+    std::map<std::string, std::string> get_type_info(const ColumnObject& 
column, size_t row) const {
+        std::map<std::string, std::string> result;
+        Field field = column[row];
+        const auto& variant_map = field.get<const VariantMap&>();
+        for (const auto& [key, value] : variant_map) {
+            if (key.empty() && value.get_type() == Field::Types::JSONB &&
+                value.get<const JsonbField&>().get_size() == 0) {
+                // ignore empty jsonb root, it's tricky here
+                continue;
+            }
+            FieldInfo info;
+            schema_util::get_field_info(value, &info);
+            result[key] = getTypeName(info.scalar_type_id);
+        }
+        return result;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        uint32_t result, size_t input_rows_count) const 
override {
+        const auto& arg_column =
+                assert_cast<const 
ColumnObject&>(*block.get_by_position(arguments[0]).column);
+        auto result_column = ColumnString::create();
+        auto arg_real_type = arg_column.get_root_type();
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            const Field& variant_map = arg_column[i];
+            auto type_info = get_type_info(arg_column, i);
+
+            // Use ColumnString as buffer for JSON serialization
+            VectorBufferWriter writer(*result_column.get());
+
+            // Write JSON object
+            writeChar('{', writer);
+
+            bool first = true;
+            for (const auto& [key, value] : type_info) {
+                if (!first) {
+                    writeChar(',', writer);
+                }
+                first = false;
+
+                // Write key
+                writeJSONString(key, writer);
+                writeCString(":", writer);
+
+                // Write value
+                writeJSONString(value, writer);
+            }
+
+            writeChar('}', writer);
+            writer.commit();
+        }
+
+        block.replace_by_position(result, std::move(result_column));
+        return Status::OK();
+    }
+};
+
+void register_function_variant_type(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionVariantType>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/simple_function_factory.h 
b/be/src/vec/functions/simple_function_factory.h
index 46eca0cb419..2e4e54d49c2 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -112,6 +112,7 @@ void 
register_function_split_by_regexp(SimpleFunctionFactory& factory);
 void register_function_assert_true(SimpleFunctionFactory& factory);
 void register_function_compress(SimpleFunctionFactory& factory);
 void register_function_bit_test(SimpleFunctionFactory& factory);
+void register_function_variant_type(SimpleFunctionFactory& factory);
 
 class SimpleFunctionFactory {
     using Creator = std::function<FunctionBuilderPtr()>;
@@ -303,6 +304,7 @@ public:
             register_function_assert_true(instance);
             register_function_bit_test(instance);
             register_function_compress(instance);
+            register_function_variant_type(instance);
         });
         return instance;
     }
diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp
index 4ef416dc865..3dc281a6fda 100644
--- a/be/src/vec/json/parse2column.cpp
+++ b/be/src/vec/json/parse2column.cpp
@@ -157,7 +157,7 @@ void parse_json_to_variant(IColumn& column, const char* 
src, size_t length,
     size_t old_num_rows = column_object.rows();
     for (size_t i = 0; i < paths.size(); ++i) {
         FieldInfo field_info;
-        get_field_info(values[i], &field_info);
+        schema_util::get_field_info(values[i], &field_info);
         if (WhichDataType(field_info.scalar_type_id).is_nothing()) {
             continue;
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index e482b419ac5..46b79d6f514 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -199,6 +199,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonBigInt
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonDouble;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonInt;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonString;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.GetVariantType;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Greatest;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Grouping;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.GroupingId;
@@ -982,7 +983,8 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(SessionUser.class, "session_user"),
             scalar(LastQueryId.class, "last_query_id"),
             scalar(Compress.class, "compress"),
-            scalar(Uncompress.class, "uncompress"));
+            scalar(Uncompress.class, "uncompress"),
+            scalar(GetVariantType.class, "variant_type"));
 
     public static final BuiltinScalarFunctions INSTANCE = new 
BuiltinScalarFunctions();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java
new file mode 100644
index 00000000000..b3e4c4e6f41
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VariantType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'get_variant_type'. This class is generated by 
GenerateFunction.
+ */
+public class GetVariantType extends ScalarFunction
+        implements BinaryExpression, ExplicitlyCastableSignature, 
AlwaysNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            FunctionSignature.ret(StringType.INSTANCE).args(new VariantType(0))
+    );
+
+    /**
+     * constructor with 2 arguments.
+     */
+    public GetVariantType(Expression arg0) {
+        super("variant_type", arg0);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public GetVariantType withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 1);
+        return new GetVariantType(children.get(0));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitGetVariantType(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 4346fb71591..b4b16a86c29 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -204,6 +204,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonBigInt
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonDouble;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonInt;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonString;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.GetVariantType;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Greatest;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Hex;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.HllCardinality;
@@ -2348,4 +2349,8 @@ public interface ScalarFunctionVisitor<R, C> {
     default R visitUncompress(Uncompress uncompress, C context) {
         return visitScalarFunction(uncompress, context);
     }
+
+    default R visitGetVariantType(GetVariantType getVariantType, C context) {
+        return visitScalarFunction(getVariantType, context);
+    }
 }
diff --git a/regression-test/data/variant_p0/predefine/load.out 
b/regression-test/data/variant_p0/predefine/load.out
index e2347eb8508..edc57afcd76 100644
Binary files a/regression-test/data/variant_p0/predefine/load.out and 
b/regression-test/data/variant_p0/predefine/load.out differ
diff --git a/regression-test/suites/variant_p0/predefine/load.groovy 
b/regression-test/suites/variant_p0/predefine/load.groovy
index cf0ad73491f..c83c93b2116 100644
--- a/regression-test/suites/variant_p0/predefine/load.groovy
+++ b/regression-test/suites/variant_p0/predefine/load.groovy
@@ -318,5 +318,23 @@ suite("regression_test_variant_predefine_schema", "p0"){
     sql """insert into test_array_with_nulls values(5, '{"array_decimal" : 
[1.1, 2.2, 3.3, 4.4]}')"""
     sql """insert into test_array_with_nulls values(6, '{"array_decimal" : 
[]}')"""
     sql """insert into test_array_with_nulls values(7, '{"array_decimal" : 
[null, null]}')"""
-    qt_sql_arr_null_2 "select * from test_array_with_nulls order by k"
-}
\ No newline at end of file
+    qt_sql_arr_null_2 "select * from test_array_with_nulls order by k limit 5"
+
+    // test variant_type
+    sql "DROP TABLE IF EXISTS test_variant_type"
+    sql """
+        CREATE TABLE `test_variant_type` (
+      `k` bigint NULL,
+      `var` variant<match_name 'dcm' : decimal, 'db' : double, 'dt' : 
datetime, 'a.b.c' : array<int>>
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`k`)
+    DISTRIBUTED BY HASH(`k`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "min_load_replica_num" = "-1",
+    "variant_max_subcolumns_count" = "0"
+    );
+    """
+    sql """insert into test_variant_type values(1, '{"dcm" : 1.1, "db" : 2.2, 
"dt" : "2021-01-01 00:00:00", "a.b.c" : [1, 2, 3]}')"""
+    qt_sql "select variant_type(var) from test_variant_type"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to