This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push: new f7c19d49806 [feature](function) implement variant_type to get inner types in variant columns (#49909) f7c19d49806 is described below commit f7c19d49806b63aa2b27168e054afa2d8523973d Author: lihangyu <lihan...@selectdb.com> AuthorDate: Thu Apr 10 10:49:24 2025 +0800 [feature](function) implement variant_type to get inner types in variant columns (#49909) --- be/src/vec/columns/column_object.cpp | 290 +++------------------ be/src/vec/columns/column_object.h | 1 - be/src/vec/common/schema_util.cpp | 226 ++++++++++++++++ be/src/vec/common/schema_util.h | 2 + be/src/vec/data_types/data_type_jsonb.h | 4 +- be/src/vec/functions/function_variant_type.cpp | 105 ++++++++ be/src/vec/functions/simple_function_factory.h | 2 + be/src/vec/json/parse2column.cpp | 2 +- .../doris/catalog/BuiltinScalarFunctions.java | 4 +- .../functions/scalar/GetVariantType.java | 69 +++++ .../expressions/visitor/ScalarFunctionVisitor.java | 5 + regression-test/data/variant_p0/predefine/load.out | Bin 7454 -> 7526 bytes .../suites/variant_p0/predefine/load.groovy | 22 +- 13 files changed, 466 insertions(+), 266 deletions(-) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 18ff9033cf9..1b607a7e87e 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -137,265 +137,8 @@ size_t get_number_of_dimensions(const IDataType& type) { } return num_dimensions; } - -/// Calculates number of dimensions in array field. -/// Returns 0 for scalar fields. -class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> { -public: - size_t operator()(const Array& x) const { - const size_t size = x.size(); - size_t dimensions = 0; - for (size_t i = 0; i < size; ++i) { - size_t element_dimensions = apply_visitor(*this, x[i]); - dimensions = std::max(dimensions, element_dimensions); - } - return 1 + dimensions; - } - size_t operator()(const VariantField& x) { return apply_visitor(*this, x.get_field()); } - template <typename T> - size_t operator()(const T&) const { - return 0; - } -}; - -// Visitor that allows to get type of scalar field -// but exclude fields contain complex field.This is a faster version -// for FieldVisitorToScalarType which does not support complex field. -class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> { -public: - size_t operator()(const Array& x) { - throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported"); - } - size_t operator()(const UInt64& x) { - if (x <= std::numeric_limits<Int8>::max()) { - type = TypeIndex::Int8; - } else if (x <= std::numeric_limits<Int16>::max()) { - type = TypeIndex::Int16; - } else if (x <= std::numeric_limits<Int32>::max()) { - type = TypeIndex::Int32; - } else { - type = TypeIndex::Int64; - } - return 1; - } - size_t operator()(const Int64& x) { - if (x <= std::numeric_limits<Int8>::max() && x >= std::numeric_limits<Int8>::min()) { - type = TypeIndex::Int8; - } else if (x <= std::numeric_limits<Int16>::max() && - x >= std::numeric_limits<Int16>::min()) { - type = TypeIndex::Int16; - } else if (x <= std::numeric_limits<Int32>::max() && - x >= std::numeric_limits<Int32>::min()) { - type = TypeIndex::Int32; - } else { - type = TypeIndex::Int64; - } - return 1; - } - size_t operator()(const JsonbField& x) { - type = TypeIndex::JSONB; - return 1; - } - size_t operator()(const Null&) { - have_nulls = true; - return 1; - } - size_t operator()(const VariantMap&) { - type = TypeIndex::VARIANT; - return 1; - } - size_t operator()(const VariantField& x) { - typed_field_info = - FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), x.get_precision()}; - return 1; - } - template <typename T> - size_t operator()(const T&) { - type = TypeId<NearestFieldType<T>>::value; - return 1; - } - void get_scalar_type(TypeIndex* data_type, int* precision, int* scale) const { - if (typed_field_info.has_value()) { - *data_type = typed_field_info->scalar_type_id; - *precision = typed_field_info->precision; - *scale = typed_field_info->scale; - return; - } - *data_type = type; - } - bool contain_nulls() const { return have_nulls; } - - bool need_convert_field() const { return false; } - -private: - // initialized when operator()(const VariantField& x) - std::optional<FieldInfo> typed_field_info; - TypeIndex type = TypeIndex::Nothing; - bool have_nulls = false; -}; - -/// Visitor that allows to get type of scalar field -/// or least common type of scalars in array. -/// More optimized version of FieldToDataType. -class FieldVisitorToScalarType : public StaticVisitor<size_t> { -public: - using FieldType = Field::Types::Which; - size_t operator()(const Array& x) { - size_t size = x.size(); - for (size_t i = 0; i < size; ++i) { - apply_visitor(*this, x[i]); - } - return 0; - } - // TODO doris not support unsigned integers for now - // treat as signed integers - size_t operator()(const UInt64& x) { - field_types.insert(FieldType::UInt64); - if (x <= std::numeric_limits<Int8>::max()) { - type_indexes.insert(TypeIndex::Int8); - } else if (x <= std::numeric_limits<Int16>::max()) { - type_indexes.insert(TypeIndex::Int16); - } else if (x <= std::numeric_limits<Int32>::max()) { - type_indexes.insert(TypeIndex::Int32); - } else { - type_indexes.insert(TypeIndex::Int64); - } - return 0; - } - size_t operator()(const Int64& x) { - field_types.insert(FieldType::Int64); - if (x <= std::numeric_limits<Int8>::max() && x >= std::numeric_limits<Int8>::min()) { - type_indexes.insert(TypeIndex::Int8); - } else if (x <= std::numeric_limits<Int16>::max() && - x >= std::numeric_limits<Int16>::min()) { - type_indexes.insert(TypeIndex::Int16); - } else if (x <= std::numeric_limits<Int32>::max() && - x >= std::numeric_limits<Int32>::min()) { - type_indexes.insert(TypeIndex::Int32); - } else { - type_indexes.insert(TypeIndex::Int64); - } - return 0; - } - size_t operator()(const JsonbField& x) { - field_types.insert(FieldType::JSONB); - type_indexes.insert(TypeIndex::JSONB); - return 0; - } - size_t operator()(const VariantMap&) { - field_types.insert(FieldType::VariantMap); - type_indexes.insert(TypeIndex::VARIANT); - return 0; - } - size_t operator()(const VariantField& x) { - if (x.get_type_id() == TypeIndex::Array) { - apply_visitor(*this, x.get_field()); - } else { - typed_field_info = - FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), x.get_precision()}; - } - return 0; - } - size_t operator()(const Null&) { - have_nulls = true; - return 0; - } - template <typename T> - size_t operator()(const T&) { - Field::EnumToType<Field::Types::Array>::Type a; - field_types.insert(Field::TypeToEnum<NearestFieldType<T>>::value); - type_indexes.insert(TypeId<NearestFieldType<T>>::value); - return 0; - } - void get_scalar_type(TypeIndex* type, int* precision, int* scale) const { - if (typed_field_info.has_value()) { - // fast path - *type = typed_field_info->scalar_type_id; - *precision = typed_field_info->precision; - *scale = typed_field_info->scale; - return; - } - DataTypePtr data_type; - get_least_supertype_jsonb(type_indexes, &data_type); - *type = data_type->get_type_id(); - } - bool contain_nulls() const { return have_nulls; } - bool need_convert_field() const { return field_types.size() > 1; } - -private: - // initialized when operator()(const VariantField& x) - std::optional<FieldInfo> typed_field_info; - phmap::flat_hash_set<TypeIndex> type_indexes; - phmap::flat_hash_set<FieldType> field_types; - bool have_nulls = false; -}; - -/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays -/// and replaces all scalars or nested arrays to @replacement at that level. -class FieldVisitorReplaceScalars : public StaticVisitor<Field> { -public: - FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_) - : replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {} - - Field operator()(const Array& x) const { - if (num_dimensions_to_keep == 0) { - return replacement; - } - - const size_t size = x.size(); - Array res(size); - for (size_t i = 0; i < size; ++i) { - res[i] = apply_visitor( - FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]); - } - return res; - } - - template <typename T> - Field operator()(const T&) const { - return replacement; - } - -private: - const Field& replacement; - size_t num_dimensions_to_keep; -}; - } // namespace -template <typename Visitor> -void get_field_info_impl(const Field& field, FieldInfo* info) { - Visitor to_scalar_type_visitor; - apply_visitor(to_scalar_type_visitor, field); - TypeIndex type_id; - int precision = 0; - int scale = 0; - to_scalar_type_visitor.get_scalar_type(&type_id, &precision, &scale); - // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]] - *info = { - type_id, - to_scalar_type_visitor.contain_nulls(), - to_scalar_type_visitor.need_convert_field(), - apply_visitor(FieldVisitorToNumberOfDimensions(), field), - scale, - precision, - }; -} - -bool is_complex_field(const Field& field) { - return field.is_complex_field() || - (field.is_variant_field() && - field.get<const VariantField&>().get_field().is_complex_field()); -} - -void get_field_info(const Field& field, FieldInfo* info) { - if (is_complex_field(field)) { - get_field_info_impl<FieldVisitorToScalarType>(field, info); - } else { - get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info); - } -} - #ifdef NDEBUG #define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */ #else @@ -472,7 +215,7 @@ Field get_field_from_variant_field(const Field& field) { void ColumnObject::Subcolumn::insert(Field field) { FieldInfo info; - get_field_info(field, &info); + schema_util::get_field_info(field, &info); field = get_field_from_variant_field(field); insert(std::move(field), std::move(info)); } @@ -2481,6 +2224,37 @@ bool ColumnObject::try_insert_many_defaults_from_nested(const Subcolumns::NodePt return true; } +/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays +/// and replaces all scalars or nested arrays to @replacement at that level. +class FieldVisitorReplaceScalars : public StaticVisitor<Field> { +public: + FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_) + : replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {} + + Field operator()(const Array& x) const { + if (num_dimensions_to_keep == 0) { + return replacement; + } + + const size_t size = x.size(); + Array res(size); + for (size_t i = 0; i < size; ++i) { + res[i] = apply_visitor( + FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]); + } + return res; + } + + template <typename T> + Field operator()(const T&) const { + return replacement; + } + +private: + const Field& replacement; + size_t num_dimensions_to_keep; +}; + bool ColumnObject::try_insert_default_from_nested(const Subcolumns::NodePtr& entry) const { const auto* leaf = get_leaf_of_the_same_nested(entry); if (!leaf) { diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index ad8a693e736..8a76151bba6 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -84,7 +84,6 @@ struct FieldInfo { int precision = 0; }; -void get_field_info(const Field& field, FieldInfo* info); /** A column that represents object with dynamic set of subcolumns. * Subcolumns are identified by paths in document and are stored in * a trie-like structure. ColumnObject is not suitable for writing into tables diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 17430380310..12049e20358 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -64,6 +64,7 @@ #include "vec/columns/column_object.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" +#include "vec/common/field_visitors.h" #include "vec/common/typeid_cast.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" @@ -904,5 +905,230 @@ void calculate_variant_stats(const IColumn& encoded_sparse_column, } } +/// Calculates number of dimensions in array field. +/// Returns 0 for scalar fields. +class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> { +public: + size_t operator()(const Array& x) const { + const size_t size = x.size(); + size_t dimensions = 0; + for (size_t i = 0; i < size; ++i) { + size_t element_dimensions = apply_visitor(*this, x[i]); + dimensions = std::max(dimensions, element_dimensions); + } + return 1 + dimensions; + } + size_t operator()(const VariantField& x) { return apply_visitor(*this, x.get_field()); } + template <typename T> + size_t operator()(const T&) const { + return 0; + } +}; + +// Visitor that allows to get type of scalar field +// but exclude fields contain complex field.This is a faster version +// for FieldVisitorToScalarType which does not support complex field. +class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> { +public: + size_t operator()(const Array& x) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported"); + } + size_t operator()(const UInt64& x) { + if (x <= std::numeric_limits<Int8>::max()) { + type = TypeIndex::Int8; + } else if (x <= std::numeric_limits<Int16>::max()) { + type = TypeIndex::Int16; + } else if (x <= std::numeric_limits<Int32>::max()) { + type = TypeIndex::Int32; + } else { + type = TypeIndex::Int64; + } + return 1; + } + size_t operator()(const Int64& x) { + if (x <= std::numeric_limits<Int8>::max() && x >= std::numeric_limits<Int8>::min()) { + type = TypeIndex::Int8; + } else if (x <= std::numeric_limits<Int16>::max() && + x >= std::numeric_limits<Int16>::min()) { + type = TypeIndex::Int16; + } else if (x <= std::numeric_limits<Int32>::max() && + x >= std::numeric_limits<Int32>::min()) { + type = TypeIndex::Int32; + } else { + type = TypeIndex::Int64; + } + return 1; + } + size_t operator()(const JsonbField& x) { + type = TypeIndex::JSONB; + return 1; + } + size_t operator()(const Null&) { + have_nulls = true; + return 1; + } + size_t operator()(const VariantMap&) { + type = TypeIndex::VARIANT; + return 1; + } + size_t operator()(const VariantField& x) { + typed_field_info = + FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), x.get_precision()}; + return 1; + } + template <typename T> + size_t operator()(const T&) { + type = TypeId<NearestFieldType<T>>::value; + return 1; + } + void get_scalar_type(TypeIndex* data_type, int* precision, int* scale) const { + if (typed_field_info.has_value()) { + *data_type = typed_field_info->scalar_type_id; + *precision = typed_field_info->precision; + *scale = typed_field_info->scale; + return; + } + *data_type = type; + } + bool contain_nulls() const { return have_nulls; } + + bool need_convert_field() const { return false; } + +private: + // initialized when operator()(const VariantField& x) + std::optional<FieldInfo> typed_field_info; + TypeIndex type = TypeIndex::Nothing; + bool have_nulls = false; +}; + +/// Visitor that allows to get type of scalar field +/// or least common type of scalars in array. +/// More optimized version of FieldToDataType. +class FieldVisitorToScalarType : public StaticVisitor<size_t> { +public: + using FieldType = Field::Types::Which; + size_t operator()(const Array& x) { + size_t size = x.size(); + for (size_t i = 0; i < size; ++i) { + apply_visitor(*this, x[i]); + } + return 0; + } + // TODO doris not support unsigned integers for now + // treat as signed integers + size_t operator()(const UInt64& x) { + field_types.insert(FieldType::UInt64); + if (x <= std::numeric_limits<Int8>::max()) { + type_indexes.insert(TypeIndex::Int8); + } else if (x <= std::numeric_limits<Int16>::max()) { + type_indexes.insert(TypeIndex::Int16); + } else if (x <= std::numeric_limits<Int32>::max()) { + type_indexes.insert(TypeIndex::Int32); + } else { + type_indexes.insert(TypeIndex::Int64); + } + return 0; + } + size_t operator()(const Int64& x) { + field_types.insert(FieldType::Int64); + if (x <= std::numeric_limits<Int8>::max() && x >= std::numeric_limits<Int8>::min()) { + type_indexes.insert(TypeIndex::Int8); + } else if (x <= std::numeric_limits<Int16>::max() && + x >= std::numeric_limits<Int16>::min()) { + type_indexes.insert(TypeIndex::Int16); + } else if (x <= std::numeric_limits<Int32>::max() && + x >= std::numeric_limits<Int32>::min()) { + type_indexes.insert(TypeIndex::Int32); + } else { + type_indexes.insert(TypeIndex::Int64); + } + return 0; + } + size_t operator()(const JsonbField& x) { + field_types.insert(FieldType::JSONB); + type_indexes.insert(TypeIndex::JSONB); + return 0; + } + size_t operator()(const VariantMap&) { + field_types.insert(FieldType::VariantMap); + type_indexes.insert(TypeIndex::VARIANT); + return 0; + } + size_t operator()(const VariantField& x) { + if (x.get_type_id() == TypeIndex::Array) { + apply_visitor(*this, x.get_field()); + } else { + typed_field_info = + FieldInfo {x.get_type_id(), true, false, 0, x.get_scale(), x.get_precision()}; + } + return 0; + } + size_t operator()(const Null&) { + have_nulls = true; + return 0; + } + template <typename T> + size_t operator()(const T&) { + Field::EnumToType<Field::Types::Array>::Type a; + field_types.insert(Field::TypeToEnum<NearestFieldType<T>>::value); + type_indexes.insert(TypeId<NearestFieldType<T>>::value); + return 0; + } + void get_scalar_type(TypeIndex* type, int* precision, int* scale) const { + if (typed_field_info.has_value()) { + // fast path + *type = typed_field_info->scalar_type_id; + *precision = typed_field_info->precision; + *scale = typed_field_info->scale; + return; + } + DataTypePtr data_type; + get_least_supertype_jsonb(type_indexes, &data_type); + *type = data_type->get_type_id(); + } + bool contain_nulls() const { return have_nulls; } + bool need_convert_field() const { return field_types.size() > 1; } + +private: + // initialized when operator()(const VariantField& x) + std::optional<FieldInfo> typed_field_info; + phmap::flat_hash_set<TypeIndex> type_indexes; + phmap::flat_hash_set<FieldType> field_types; + bool have_nulls = false; +}; + +template <typename Visitor> +void get_field_info_impl(const Field& field, FieldInfo* info) { + Visitor to_scalar_type_visitor; + apply_visitor(to_scalar_type_visitor, field); + TypeIndex type_id; + int precision = 0; + int scale = 0; + to_scalar_type_visitor.get_scalar_type(&type_id, &precision, &scale); + // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]] + *info = { + type_id, + to_scalar_type_visitor.contain_nulls(), + to_scalar_type_visitor.need_convert_field(), + apply_visitor(FieldVisitorToNumberOfDimensions(), field), + scale, + precision, + }; +} + +bool is_complex_field(const Field& field) { + return field.is_complex_field() || + (field.is_variant_field() && + field.get<const VariantField&>().get_field().is_complex_field()); +} + +void get_field_info(const Field& field, FieldInfo* info) { + if (is_complex_field(field)) { + get_field_info_impl<FieldVisitorToScalarType>(field, info); + } else { + get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info); + } +} + #include "common/compile_check_end.h" } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index 8281cdec7b6..eba6869964e 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -152,4 +152,6 @@ void calculate_variant_stats(const IColumn& encoded_sparse_column, segment_v2::VariantStatisticsPB* stats, size_t row_pos, size_t num_rows); +void get_field_info(const Field& field, FieldInfo* info); + } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/data_types/data_type_jsonb.h b/be/src/vec/data_types/data_type_jsonb.h index 25d90d37482..6fd188574dd 100644 --- a/be/src/vec/data_types/data_type_jsonb.h +++ b/be/src/vec/data_types/data_type_jsonb.h @@ -87,9 +87,7 @@ public: // Return JsonbField. Field get_type_field(const IColumn& column, size_t row) const override { const auto& column_data = static_cast<const ColumnString&>(column); - Field field = - JsonbField(column_data.get_data_at(row).data, column_data.get_data_at(row).size); - return VariantField(std::move(field), TypeIndex::JSONB); + return JsonbField(column_data.get_data_at(row).data, column_data.get_data_at(row).size); } bool equals(const IDataType& rhs) const override; diff --git a/be/src/vec/functions/function_variant_type.cpp b/be/src/vec/functions/function_variant_type.cpp new file mode 100644 index 00000000000..8e541a6958f --- /dev/null +++ b/be/src/vec/functions/function_variant_type.cpp @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include <glog/logging.h> + +#include "vec/columns/column_object.h" +#include "vec/common/schema_util.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris { +class FunctionContext; +} // namespace doris + +namespace doris::vectorized { + +// get data type of variant column +class FunctionVariantType : public IFunction { +public: + static constexpr auto name = "variant_type"; + static FunctionPtr create() { return std::make_shared<FunctionVariantType>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared<DataTypeString>(); + } + + std::map<std::string, std::string> get_type_info(const ColumnObject& column, size_t row) const { + std::map<std::string, std::string> result; + Field field = column[row]; + const auto& variant_map = field.get<const VariantMap&>(); + for (const auto& [key, value] : variant_map) { + if (key.empty() && value.get_type() == Field::Types::JSONB && + value.get<const JsonbField&>().get_size() == 0) { + // ignore empty jsonb root, it's tricky here + continue; + } + FieldInfo info; + schema_util::get_field_info(value, &info); + result[key] = getTypeName(info.scalar_type_id); + } + return result; + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + uint32_t result, size_t input_rows_count) const override { + const auto& arg_column = + assert_cast<const ColumnObject&>(*block.get_by_position(arguments[0]).column); + auto result_column = ColumnString::create(); + auto arg_real_type = arg_column.get_root_type(); + + for (size_t i = 0; i < input_rows_count; ++i) { + const Field& variant_map = arg_column[i]; + auto type_info = get_type_info(arg_column, i); + + // Use ColumnString as buffer for JSON serialization + VectorBufferWriter writer(*result_column.get()); + + // Write JSON object + writeChar('{', writer); + + bool first = true; + for (const auto& [key, value] : type_info) { + if (!first) { + writeChar(',', writer); + } + first = false; + + // Write key + writeJSONString(key, writer); + writeCString(":", writer); + + // Write value + writeJSONString(value, writer); + } + + writeChar('}', writer); + writer.commit(); + } + + block.replace_by_position(result, std::move(result_column)); + return Status::OK(); + } +}; + +void register_function_variant_type(SimpleFunctionFactory& factory) { + factory.register_function<FunctionVariantType>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 46eca0cb419..2e4e54d49c2 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -112,6 +112,7 @@ void register_function_split_by_regexp(SimpleFunctionFactory& factory); void register_function_assert_true(SimpleFunctionFactory& factory); void register_function_compress(SimpleFunctionFactory& factory); void register_function_bit_test(SimpleFunctionFactory& factory); +void register_function_variant_type(SimpleFunctionFactory& factory); class SimpleFunctionFactory { using Creator = std::function<FunctionBuilderPtr()>; @@ -303,6 +304,7 @@ public: register_function_assert_true(instance); register_function_bit_test(instance); register_function_compress(instance); + register_function_variant_type(instance); }); return instance; } diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp index 4ef416dc865..3dc281a6fda 100644 --- a/be/src/vec/json/parse2column.cpp +++ b/be/src/vec/json/parse2column.cpp @@ -157,7 +157,7 @@ void parse_json_to_variant(IColumn& column, const char* src, size_t length, size_t old_num_rows = column_object.rows(); for (size_t i = 0; i < paths.size(); ++i) { FieldInfo field_info; - get_field_info(values[i], &field_info); + schema_util::get_field_info(values[i], &field_info); if (WhichDataType(field_info.scalar_type_id).is_nothing()) { continue; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index e482b419ac5..46b79d6f514 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -199,6 +199,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonBigInt import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonDouble; import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonInt; import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonString; +import org.apache.doris.nereids.trees.expressions.functions.scalar.GetVariantType; import org.apache.doris.nereids.trees.expressions.functions.scalar.Greatest; import org.apache.doris.nereids.trees.expressions.functions.scalar.Grouping; import org.apache.doris.nereids.trees.expressions.functions.scalar.GroupingId; @@ -982,7 +983,8 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(SessionUser.class, "session_user"), scalar(LastQueryId.class, "last_query_id"), scalar(Compress.class, "compress"), - scalar(Uncompress.class, "uncompress")); + scalar(Uncompress.class, "uncompress"), + scalar(GetVariantType.class, "variant_type")); public static final BuiltinScalarFunctions INSTANCE = new BuiltinScalarFunctions(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java new file mode 100644 index 00000000000..b3e4c4e6f41 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/GetVariantType.java @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VariantType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'get_variant_type'. This class is generated by GenerateFunction. + */ +public class GetVariantType extends ScalarFunction + implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable { + + public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( + FunctionSignature.ret(StringType.INSTANCE).args(new VariantType(0)) + ); + + /** + * constructor with 2 arguments. + */ + public GetVariantType(Expression arg0) { + super("variant_type", arg0); + } + + /** + * withChildren. + */ + @Override + public GetVariantType withChildren(List<Expression> children) { + Preconditions.checkArgument(children.size() == 1); + return new GetVariantType(children.get(0)); + } + + @Override + public List<FunctionSignature> getSignatures() { + return SIGNATURES; + } + + @Override + public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) { + return visitor.visitGetVariantType(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 4346fb71591..b4b16a86c29 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -204,6 +204,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonBigInt import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonDouble; import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonInt; import org.apache.doris.nereids.trees.expressions.functions.scalar.GetJsonString; +import org.apache.doris.nereids.trees.expressions.functions.scalar.GetVariantType; import org.apache.doris.nereids.trees.expressions.functions.scalar.Greatest; import org.apache.doris.nereids.trees.expressions.functions.scalar.Hex; import org.apache.doris.nereids.trees.expressions.functions.scalar.HllCardinality; @@ -2348,4 +2349,8 @@ public interface ScalarFunctionVisitor<R, C> { default R visitUncompress(Uncompress uncompress, C context) { return visitScalarFunction(uncompress, context); } + + default R visitGetVariantType(GetVariantType getVariantType, C context) { + return visitScalarFunction(getVariantType, context); + } } diff --git a/regression-test/data/variant_p0/predefine/load.out b/regression-test/data/variant_p0/predefine/load.out index e2347eb8508..edc57afcd76 100644 Binary files a/regression-test/data/variant_p0/predefine/load.out and b/regression-test/data/variant_p0/predefine/load.out differ diff --git a/regression-test/suites/variant_p0/predefine/load.groovy b/regression-test/suites/variant_p0/predefine/load.groovy index cf0ad73491f..c83c93b2116 100644 --- a/regression-test/suites/variant_p0/predefine/load.groovy +++ b/regression-test/suites/variant_p0/predefine/load.groovy @@ -318,5 +318,23 @@ suite("regression_test_variant_predefine_schema", "p0"){ sql """insert into test_array_with_nulls values(5, '{"array_decimal" : [1.1, 2.2, 3.3, 4.4]}')""" sql """insert into test_array_with_nulls values(6, '{"array_decimal" : []}')""" sql """insert into test_array_with_nulls values(7, '{"array_decimal" : [null, null]}')""" - qt_sql_arr_null_2 "select * from test_array_with_nulls order by k" -} \ No newline at end of file + qt_sql_arr_null_2 "select * from test_array_with_nulls order by k limit 5" + + // test variant_type + sql "DROP TABLE IF EXISTS test_variant_type" + sql """ + CREATE TABLE `test_variant_type` ( + `k` bigint NULL, + `var` variant<match_name 'dcm' : decimal, 'db' : double, 'dt' : datetime, 'a.b.c' : array<int>> + ) ENGINE=OLAP + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "variant_max_subcolumns_count" = "0" + ); + """ + sql """insert into test_variant_type values(1, '{"dcm" : 1.1, "db" : 2.2, "dt" : "2021-01-01 00:00:00", "a.b.c" : [1, 2, 3]}')""" + qt_sql "select variant_type(var) from test_variant_type" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org