This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch struct-type in repository https://gitbox.apache.org/repos/asf/doris.git
commit bebab27bc9c1a1a93193359fd929309c81c2a69b Author: carlvinhust2012 <huchengha...@126.com> AuthorDate: Thu Dec 8 09:26:12 2022 +0800 [feature](struct-type) add the class ColumnStruct and class DataTypeStruct implement (#14545) Co-authored-by: hucheng01 <huchen...@baidu.com> --- be/src/vec/CMakeLists.txt | 2 + be/src/vec/columns/column_struct.cpp | 618 +++++++++++++++++++++++++++++ be/src/vec/columns/column_struct.h | 232 +++++++++++ be/src/vec/core/types.h | 3 + be/src/vec/data_types/data_type_struct.cpp | 361 +++++++++++++++++ be/src/vec/data_types/data_type_struct.h | 115 ++++++ 6 files changed, 1331 insertions(+) diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 4426ba63a7..27d163e8ca 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -49,6 +49,7 @@ set(VEC_FILES aggregate_functions/aggregate_function_histogram.cpp columns/column.cpp columns/column_array.cpp + columns/column_struct.cpp columns/column_const.cpp columns/column_decimal.cpp columns/column_nullable.cpp @@ -72,6 +73,7 @@ set(VEC_FILES core/materialize_block.cpp data_types/data_type.cpp data_types/data_type_array.cpp + data_types/data_type_struct.cpp data_types/data_type_bitmap.cpp data_types/data_type_factory.cpp data_types/data_type_fixed_length_object.cpp diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp new file mode 100644 index 0000000000..05d5df0fd4 --- /dev/null +++ b/be/src/vec/columns/column_struct.cpp @@ -0,0 +1,618 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnStruct.cpp +// and modified by Doris + +#include "vec/columns/column_struct.h" + +namespace doris::vectorized { + +namespace ErrorCodes { +extern const int ILLEGAL_COLUMN; +extern const int NOT_IMPLEMENTED; +extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE; +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes + +std::string ColumnStruct::get_name() const { + std::stringstream res; + res << "Struct("; + bool is_first = true; + for (const auto& column : columns) { + if (!is_first) { + res << ", "; + } + is_first = false; + res << column->get_name(); + } + res << ")"; + return res.str(); +} + +ColumnStruct::ColumnStruct(MutableColumns&& mutable_columns) { + columns.reserve(mutable_columns.size()); + for (auto& column : mutable_columns) { + if (is_column_const(*column)) { + throw Exception {"ColumnStruct cannot have ColumnConst as its element", + ErrorCodes::ILLEGAL_COLUMN}; + } + columns.push_back(std::move(column)); + } +} + +ColumnStruct::ColumnStruct(Columns&& columns) { + columns.reserve(columns.size()); + for (auto& column : columns) { + if (is_column_const(*column)) { + throw Exception {"ColumnStruct cannot have ColumnConst as its element", + ErrorCodes::ILLEGAL_COLUMN}; + } + columns.push_back(std::move(column)); + } +} + +ColumnStruct::ColumnStruct(TupleColumns&& tuple_columns) { + columns.reserve(tuple_columns.size()); + for (auto& column : tuple_columns) { + if (is_column_const(*column)) { + throw Exception {"ColumnStruct cannot have ColumnConst as its element", + ErrorCodes::ILLEGAL_COLUMN}; + } + columns.push_back(std::move(column)); + } +} + +ColumnStruct::Ptr ColumnStruct::create(Columns& columns) { + for (const auto& column : columns) { + if (is_column_const(*column)) + throw Exception {"ColumnStruct cannot have ColumnConst as its element", + ErrorCodes::ILLEGAL_COLUMN}; + } + auto column_struct = ColumnStruct::create(columns); + return column_struct; +} + +ColumnStruct::Ptr ColumnStruct::create(TupleColumns& tuple_columns) { + for (const auto& column : tuple_columns) { + if (is_column_const(*column)) { + throw Exception {"ColumnStruct cannot have ColumnConst as its element", + ErrorCodes::ILLEGAL_COLUMN}; + } + } + auto column_struct = ColumnStruct::create(tuple_columns); + return column_struct; +} + +MutableColumnPtr ColumnStruct::clone_empty() const { + const size_t tuple_size = columns.size(); + MutableColumns new_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) { + new_columns[i] = columns[i]->clone_empty(); + } + return ColumnStruct::create(std::move(new_columns)); +} + +MutableColumnPtr ColumnStruct::clone_resized(size_t new_size) const { + const size_t tuple_size = columns.size(); + MutableColumns new_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) { + new_columns[i] = columns[i]->clone_resized(new_size); + } + return ColumnStruct::create(std::move(new_columns)); +} + +Field ColumnStruct::operator[](size_t n) const { + Field res; + get(n, res); + return res; +} + +void ColumnStruct::get(size_t n, Field& res) const { + const size_t tuple_size = columns.size(); + + res = Tuple(); + Tuple& res_tuple = res.get<Tuple&>(); + res_tuple.reserve(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) { + res_tuple.push_back((*columns[i])[n]); + } +} + +bool ColumnStruct::is_default_at(size_t n) const { + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) { + if (!columns[i]->is_default_at(n)) { + return false; + } + } + return true; +} + +StringRef ColumnStruct::get_data_at(size_t) const { + throw Exception("Method get_data_at is not supported for " + get_name(), + ErrorCodes::NOT_IMPLEMENTED); +} + +void ColumnStruct::insert_data(const char*, size_t) { + throw Exception("Method insert_data is not supported for " + get_name(), + ErrorCodes::NOT_IMPLEMENTED); +} + +void ColumnStruct::insert(const Field& x) { + const auto& tuple = x.get<const Tuple&>(); + const size_t tuple_size = columns.size(); + if (tuple.size() != tuple_size) { + throw Exception("Cannot insert value of different size into tuple", + ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE); + } + + for (size_t i = 0; i < tuple_size; ++i) { + columns[i]->insert(tuple[i]); + } +} + +void ColumnStruct::insert_from(const IColumn& src_, size_t n) { + const ColumnStruct& src = assert_cast<const ColumnStruct&>(src_); + + const size_t tuple_size = columns.size(); + if (src.columns.size() != tuple_size) { + throw Exception("Cannot insert value of different size into tuple", + ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE); + } + + for (size_t i = 0; i < tuple_size; ++i) { + columns[i]->insert_from(*src.columns[i], n); + } +} + +void ColumnStruct::insert_default() { + for (auto& column : columns) { + column->insert_default(); + } +} + +void ColumnStruct::pop_back(size_t n) { + for (auto& column : columns) { + column->pop_back(n); + } +} + +StringRef ColumnStruct::serialize_value_into_arena(size_t n, Arena& arena, + char const*& begin) const { + StringRef res(begin, 0); + for (const auto& column : columns) { + auto value_ref = column->serialize_value_into_arena(n, arena, begin); + res.data = value_ref.data - res.size; + res.size += value_ref.size; + } + + return res; +} + +const char* ColumnStruct::deserialize_and_insert_from_arena(const char* pos) { + for (auto& column : columns) { + pos = column->deserialize_and_insert_from_arena(pos); + } + + return pos; +} + +void ColumnStruct::update_hash_with_value(size_t n, SipHash& hash) const { + for (const auto& column : columns) { + column->update_hash_with_value(n, hash); + } +} + +// void ColumnStruct::update_weak_hash32(WeakHash32 & hash) const { +// auto s = size(); +// if (hash.get_data().size() != s) { +// throw Exception("Size of WeakHash32 does not match size of column: column size is " + std::to_string(s) + +// ", hash size is " + std::to_string(hash.getData().size()), ErrorCodes::LOGICAL_ERROR); +// } + +// for (const auto & column : columns) { +// column->update_weak_hash32(hash); +// } +// } + +// void ColumnStruct::update_hash_fast(SipHash & hash) const { +// for (const auto & column : columns) { +// column->update_hash_fast(hash); +// } +// } + +// const char * ColumnStruct::skip_serialized_in_arena(const char * pos) const { +// for (const auto & column : columns) { +// pos = column->skip_serialized_in_arena(pos); +// } +// return pos; +// } + +// void ColumnStruct::expand(const Filter & mask, bool inverted) +// { +// for (auto & column : columns) { +// column->expand(mask, inverted); +// } +// } + +// ColumnPtr ColumnStruct::index(const IColumn & indexes, size_t limit) const +// { +// const size_t tuple_size = columns.size(); +// Columns new_columns(tuple_size); + +// for (size_t i = 0; i < tuple_size; ++i) { +// new_columns[i] = columns[i]->index(indexes, limit); +// } + +// return ColumnStruct::create(new_columns); +// } + +void ColumnStruct::insert_range_from(const IColumn& src, size_t start, size_t length) { + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) { + columns[i]->insert_range_from(*assert_cast<const ColumnStruct&>(src).columns[i], start, + length); + } +} + +ColumnPtr ColumnStruct::filter(const Filter& filt, ssize_t result_size_hint) const { + const size_t tuple_size = columns.size(); + Columns new_columns(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) { + new_columns[i] = columns[i]->filter(filt, result_size_hint); + } + return ColumnStruct::create(new_columns); +} + +ColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const { + const size_t tuple_size = columns.size(); + Columns new_columns(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) { + new_columns[i] = columns[i]->permute(perm, limit); + } + + return ColumnStruct::create(new_columns); +} + +ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const { + const size_t tuple_size = columns.size(); + Columns new_columns(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) { + new_columns[i] = columns[i]->replicate(offsets); + } + + return ColumnStruct::create(new_columns); +} + +MutableColumns ColumnStruct::scatter(ColumnIndex num_columns, const Selector& selector) const { + const size_t tuple_size = columns.size(); + std::vector<MutableColumns> scattered_tuple_elements(tuple_size); + + for (size_t tuple_element_idx = 0; tuple_element_idx < tuple_size; ++tuple_element_idx) { + scattered_tuple_elements[tuple_element_idx] = + columns[tuple_element_idx]->scatter(num_columns, selector); + } + + MutableColumns res(num_columns); + + for (size_t scattered_idx = 0; scattered_idx < num_columns; ++scattered_idx) { + MutableColumns new_columns(tuple_size); + for (size_t tuple_element_idx = 0; tuple_element_idx < tuple_size; ++tuple_element_idx) { + new_columns[tuple_element_idx] = + std::move(scattered_tuple_elements[tuple_element_idx][scattered_idx]); + } + res[scattered_idx] = ColumnStruct::create(std::move(new_columns)); + } + + return res; +} + +// int ColumnStruct::compare_at_impl(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint, +// const Collator* collator) const { +// const size_t tuple_size = columns.size(); +// for (size_t i = 0; i < tuple_size; ++i) { +// int res = 0; +// if (collator && columns[i]->is_collation_supported()) { +// res = columns[i]->compare_at_with_collation( +// n, m, *assert_cast<const ColumnStruct&>(rhs).columns[i], nan_direction_hint, +// *collator); +// } else { +// res = columns[i]->compare_at(n, m, *assert_cast<const ColumnStruct&>(rhs).columns[i], +// nan_direction_hint); +// } + +// if (res) { +// return res; +// } +// } +// return 0; +// } + +// int ColumnStruct::compare_at(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint) const { +// return compare_at_impl(n, m, rhs, nan_direction_hint); +// } + +// void ColumnStruct::compare_column(const IColumn& rhs, size_t rhs_row_num, +// PaddedPODArray<UInt64>* row_indexes, +// PaddedPODArray<Int8>& compare_results, int direction, +// int nan_direction_hint) const { +// return do_compare_column<ColumnStruct>(assert_cast<const ColumnStruct&>(rhs), rhs_row_num, +// row_indexes, compare_results, direction, +// nan_direction_hint); +// } + +// int ColumnStruct::compare_at_with_collation(size_t n, size_t m, const IColumn& rhs, +// int nan_direction_hint, +// const Collator& collator) const { +// return compare_at_impl(n, m, rhs, nan_direction_hint, &collator); +// } + +// bool ColumnStruct::has_equal_values() const { +// return has_equal_values_impl<ColumnStruct>(); +// } + +// template <bool positive> +// struct ColumnStruct::Less { +// TupleColumns columns; +// int nan_direction_hint; +// const Collator* collator; + +// Less(const TupleColumns& columns_, int nan_direction_hint_, const Collator* collator_ = nullptr) +// : columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_) {} + +// bool operator()(size_t a, size_t b) const { +// for (const auto& column : columns) { +// int res; +// if (collator && column->isCollationSupported()) { +// res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator); +// } else { +// res = column->compareAt(a, b, *column, nan_direction_hint); +// } +// if (res < 0) { +// return positive; +// } else if (res > 0) { +// return !positive; +// } +// } +// return false; +// } +// }; + +// void ColumnStruct::get_permutation_impl(IColumn::PermutationSortDirection direction, +// IColumn::PermutationSortStability stability, size_t limit, +// int nan_direction_hint, Permutation& res, +// const Collator* collator) const { +// size_t rows = size(); +// res.resize(rows); +// for (size_t i = 0; i < rows; ++i) { +// res[i] = i; +// } + +// if (limit >= rows) { +// limit = 0; +// } + +// EqualRange ranges; +// ranges.emplace_back(0, rows); +// update_permutation_impl(direction, stability, limit, nan_direction_hint, res, ranges, collator); +// } + +// void ColumnStruct::update_permutation_impl(IColumn::PermutationSortDirection direction, +// IColumn::PermutationSortStability stability, +// size_t limit, int nan_direction_hint, +// IColumn::Permutation& res, EqualRanges& equal_ranges, +// const Collator* collator) const { +// if (equal_ranges.empty()) { +// return; +// } + +// for (const auto& column : columns) { +// while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().first) { +// equal_ranges.pop_back(); +// } + +// if (collator && column->isCollationSupported()) { +// column->update_permutation_with_collation(*collator, direction, stability, limit, +// nan_direction_hint, res, equal_ranges); +// } else { +// column->update_permutation(direction, stability, limit, nan_direction_hint, res, +// equal_ranges); +// } +// if (equal_ranges.empty()) { +// break; +// } +// } +// } + +// void ColumnStruct::get_permutation(IColumn::PermutationSortDirection direction, +// IColumn::PermutationSortStability stability, size_t limit, +// int nan_direction_hint, Permutation& res) const { +// get_permutation_impl(direction, stability, limit, nan_direction_hint, res, nullptr); +// } + +// void ColumnStruct::update_permutation(IColumn::PermutationSortDirection direction, +// IColumn::PermutationSortStability stability, size_t limit, +// int nan_direction_hint, IColumn::Permutation& res, +// EqualRanges& equal_ranges) const { +// update_permutation_impl(direction, stability, limit, nan_direction_hint, res, equal_ranges); +// } + +// void ColumnStruct::get_permutation_with_collation(const Collator& collator, +// IColumn::PermutationSortDirection direction, +// IColumn::PermutationSortStability stability, +// size_t limit, int nan_direction_hint, +// Permutation& res) const { +// get_permutation_impl(direction, stability, limit, nan_direction_hint, res, &collator); +// } + +// void ColumnStruct::update_permutation_with_collation(const Collator& collator, +// IColumn::PermutationSortDirection direction, +// IColumn::PermutationSortStability stability, +// size_t limit, int nan_direction_hint, +// Permutation& res, +// EqualRanges& equal_ranges) const { +// update_permutation_impl(direction, stability, limit, nan_direction_hint, res, equal_ranges, +// &collator); +// } + +// void ColumnStruct::gather(ColumnGathererStream& gatherer) { +// gatherer.gather(*this); +// } + +void ColumnStruct::reserve(size_t n) { + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) { + get_column(i).reserve(n); + } +} + +size_t ColumnStruct::byte_size() const { + size_t res = 0; + for (const auto& column : columns) { + res += column->byte_size(); + } + return res; +} + +// size_t ColumnStruct::byte_size_at(size_t n) const { +// size_t res = 0; +// for (const auto& column : columns) { +// res += column->byte_size_at(n); +// } +// return res; +// } + +// void ColumnStruct::ensure_ownership() { +// const size_t tuple_size = columns.size(); +// for (size_t i = 0; i < tuple_size; ++i) { +// get_column(i).ensure_ownership(); +// } +// } + +size_t ColumnStruct::allocated_bytes() const { + size_t res = 0; + for (const auto& column : columns) { + res += column->allocated_bytes(); + } + return res; +} + +void ColumnStruct::protect() { + for (auto& column : columns) { + column->protect(); + } +} + +void ColumnStruct::get_extremes(Field& min, Field& max) const { + const size_t tuple_size = columns.size(); + + Tuple min_tuple(tuple_size); + Tuple max_tuple(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) { + columns[i]->get_extremes(min_tuple[i], max_tuple[i]); + } + + min = min_tuple; + max = max_tuple; +} + +void ColumnStruct::for_each_subcolumn(ColumnCallback callback) { + for (auto& column : columns) { + callback(column); + } +} + +bool ColumnStruct::structure_equals(const IColumn& rhs) const { + if (const auto* rhs_tuple = typeid_cast<const ColumnStruct*>(&rhs)) { + const size_t tuple_size = columns.size(); + if (tuple_size != rhs_tuple->columns.size()) { + return false; + } + + for (size_t i = 0; i < tuple_size; ++i) { + if (!columns[i]->structure_equals(*rhs_tuple->columns[i])) { + return false; + } + } + return true; + } else { + return false; + } +} + +// void ColumnStruct::for_each_subcolumn_recursively(ColumnCallback callback) { +// for (auto& column : columns) { +// callback(column); +// column->for_each_subcolumn_recursively(callback); +// } +// } + +// bool ColumnStruct::is_collation_supported() const { +// for (const auto& column : columns) { +// if (column->is_collation_supported()) { +// return true; +// } +// } +// return false; +// } + +// ColumnPtr ColumnStruct::compress() const { +// size_t byte_size = 0; +// Columns compressed; +// compressed.reserve(columns.size()); +// for (const auto& column : columns) { +// auto compressed_column = column->compress(); +// byte_size += compressed_column->byteSize(); +// compressed.emplace_back(std::move(compressed_column)); +// } + +// return ColumnCompressed::create(size(), byte_size, +// [compressed = std::move(compressed)]() mutable { +// for (auto& column : compressed) { +// column = column->decompress(); +// } +// return ColumnStruct::create(compressed); +// }); +// } + +// double ColumnStruct::get_ratio_of_default_rows(double sample_ratio) const { +// return get_ratio_of_default_rows_impl<ColumnStruct>(sample_ratio); +// } + +// void ColumnStruct::get_indices_of_nondefault_rows(Offsets& indices, size_t from, +// size_t limit) const { +// return get_indices_of_nondefault_rows_impl<ColumnStruct>(indices, from, limit); +// } + +// void ColumnStruct::finalize() { +// for (auto& column : columns) { +// column->finalize(); +// } +// } + +// bool ColumnStruct::is_finalized() const { +// return std::all_of(columns.begin(), columns.end(), +// [](const auto& column) { return column->is_finalized(); }); +// } + +} // namespace doris::vectorized diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h new file mode 100644 index 0000000000..895a2796b7 --- /dev/null +++ b/be/src/vec/columns/column_struct.h @@ -0,0 +1,232 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnTuple.h +// and modified by Doris + +/******************************************************************************** +// doris/core/be/src/vec/core/field.h +class Field; +using FieldVector = std::vector<Field>; + +/// Array and Tuple use the same storage type -- FieldVector, but we declare +/// distinct types for them, so that the caller can choose whether it wants to +/// construct a Field of Array or a Tuple type. An alternative approach would be +/// to construct both of these types from FieldVector, and have the caller +/// specify the desired Field type explicitly. + +#define DEFINE_FIELD_VECTOR(X) \ + struct X : public FieldVector { \ + using FieldVector::FieldVector; \ + } + +DEFINE_FIELD_VECTOR(Array); +DEFINE_FIELD_VECTOR(Tuple); + +#undef DEFINE_FIELD_VECTOR + +// defination of some pointer +using WrappedPtr = chameleon_ptr<Derived>; + +using Ptr = immutable_ptr<Derived>; +using ColumnPtr = IColumn::Ptr; +using Columns = std::vector<ColumnPtr>; +using MutablePtr = mutable_ptr<Derived>; +using MutableColumnPtr = IColumn::MutablePtr; +using MutableColumns = std::vector<MutableColumnPtr>; +****************************************************************************/ + +#pragma once + +#include "vec/columns/column.h" +#include "vec/columns/column_impl.h" +#include "vec/columns/column_vector.h" +#include "vec/common/arena.h" +#include "vec/common/assert_cast.h" +#include "vec/common/typeid_cast.h" +#include "vec/core/field.h" +#include "vec/core/types.h" + +namespace doris::vectorized { + +/** Column, that is just group of few another columns. + * + * For constant Tuples, see ColumnConst. + * Mixed constant/non-constant columns is prohibited in tuple + * for implementation simplicity. + */ +class ColumnStruct final : public COWHelper<IColumn, ColumnStruct> { +private: + friend class COWHelper<IColumn, ColumnStruct>; + + using TupleColumns = std::vector<WrappedPtr>; + TupleColumns columns; + + template <bool positive> + struct Less; + + ColumnStruct(Columns&& columns); + ColumnStruct(TupleColumns&& tuple_columns); + explicit ColumnStruct(MutableColumns&& mutable_columns); + ColumnStruct(const ColumnStruct&) = default; + +public: + /** Create immutable column using immutable arguments. This arguments may be shared with other columns. + * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. + */ + using Base = COWHelper<IColumn, ColumnStruct>; + static Ptr create(Columns& columns); + static Ptr create(MutableColumns& columns); + static Ptr create(TupleColumns& columns); + static Ptr create(Columns&& arg) { return create(arg); } + + template <typename... Args> + static MutablePtr create(Args&&... args) { + return Base::create(std::forward<Args>(args)...); + } + + std::string get_name() const override; + const char* get_family_name() const override { return "Struct"; } + TypeIndex get_data_type() const { return TypeIndex::Struct; } + + MutableColumnPtr clone_empty() const override; + MutableColumnPtr clone_resized(size_t size) const override; + + size_t size() const override { return columns.at(0)->size(); } + + Field operator[](size_t n) const override; + void get(size_t n, Field& res) const override; + + bool is_default_at(size_t n) const override; + StringRef get_data_at(size_t n) const override; + void insert_data(const char* pos, size_t length) override; + void insert(const Field& x) override; + void insert_from(const IColumn& src_, size_t n) override; + void insert_default() override; + void pop_back(size_t n) override; + StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override; + const char* deserialize_and_insert_from_arena(const char* pos) override; + void update_hash_with_value(size_t n, SipHash& hash) const override; + + // const char * skip_serialized_in_arena(const char * pos) const override; + // void update_weak_hash32(WeakHash32 & hash) const override; + // void update_hash_fast(SipHash & hash) const override; + + void insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) override { + LOG(FATAL) << "insert_indices_from not implemented"; + } + + void get_permutation(bool reverse, size_t limit, int nan_direction_hint, + Permutation& res) const override { + LOG(FATAL) << "get_permutation not implemented"; + } + void append_data_by_selector(MutableColumnPtr& res, const Selector& selector) const override { + return append_data_by_selector_impl<ColumnStruct>(res, selector); + } + void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override { + LOG(FATAL) << "replace_column_data not implemented"; + } + void replace_column_data_default(size_t self_row = 0) override { + LOG(FATAL) << "replace_column_data_default not implemented"; + } + + void insert_range_from(const IColumn& src, size_t start, size_t length) override; + ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + ColumnPtr permute(const Permutation& perm, size_t limit) const override; + ColumnPtr replicate(const Offsets& offsets) const override; + MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override; + + // ColumnPtr index(const IColumn & indexes, size_t limit) const override; + // void expand(const Filter & mask, bool inverted) override; + // void gather(ColumnGathererStream & gatherer_stream) override; + // bool has_equal_values() const override; + + // void compare_column(const IColumn& rhs, size_t rhs_row_num, PaddedPODArray<UInt64>* row_indexes, + // PaddedPODArray<Int8>& compare_results, int direction, + // int nan_direction_hint) const override; + // int compare_at_with_collation(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint, + // const Collator& collator) const override; + + int compare_at(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint) const override; + void get_extremes(Field& min, Field& max) const override; + + // void get_permutation(IColumn::PermutationSortDirection direction, + // IColumn::PermutationSortStability stability, size_t limit, + // int nan_direction_hint, IColumn::Permutation& res) const override; + // void update_permutation(IColumn::PermutationSortDirection direction, + // IColumn::PermutationSortStability stability, size_t limit, + // int nan_direction_hint, IColumn::Permutation& res, + // EqualRanges& equal_ranges) const override; + // void get_permutation_with_collation(const Collator& collator, + // IColumn::PermutationSortDirection direction, + // IColumn::PermutationSortStability stability, size_t limit, + // int nan_direction_hint, + // IColumn::Permutation& res) const override; + // void update_permutation_with_collation(const Collator& collator, + // IColumn::PermutationSortDirection direction, + // IColumn::PermutationSortStability stability, + // size_t limit, int nan_direction_hint, + // IColumn::Permutation& res, + // EqualRanges& equal_ranges) const override; + + void reserve(size_t n) override; + size_t byte_size() const override; + + // size_t byte_size_at(size_t n) const override; + // void ensure_ownership() override; + + size_t allocated_bytes() const override; + void protect() override; + void for_each_subcolumn(ColumnCallback callback) override; + bool structure_equals(const IColumn& rhs) const override; + + // void for_each_subcolumn_recursively(ColumnCallback callback) override; + // bool is_collation_supported() const override; + // ColumnPtr compress() const override; + // double get_ratio_of_default_rows(double sample_ratio) const override; + // void get_indices_of_nondefault_rows(Offsets & indices, size_t from, size_t limit) const override; + // void finalize() override; + // bool is_finalized() const override; + + size_t tuple_size() const { return columns.size(); } + + const IColumn& get_column(size_t idx) const { return *columns[idx]; } + IColumn& get_column(size_t idx) { return *columns[idx]; } + + const TupleColumns& get_columns() const { return columns; } + Columns get_columns_copy() const { return {columns.begin(), columns.end()}; } + + const ColumnPtr& get_column_ptr(size_t idx) const { return columns[idx]; } + ColumnPtr& get_column_ptr(size_t idx) { return columns[idx]; } + +private: + int compare_at_impl(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint) const; + + // void get_permutation_impl(IColumn::PermutationSortDirection direction, + // IColumn::PermutationSortStability stability, size_t limit, + // int nan_direction_hint, Permutation& res, + // const Collator* collator) const; + + // void update_permutation_impl(IColumn::PermutationSortDirection direction, + // IColumn::PermutationSortStability stability, size_t limit, + // int nan_direction_hint, IColumn::Permutation& res, + // EqualRanges& equal_ranges, + // const Collator* collator = nullptr) const; +}; + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 7636d714b3..95947ca7eb 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -80,6 +80,7 @@ enum class TypeIndex { FixedLengthObject, JSONB, Decimal128I, + Struct, }; struct Consted { @@ -525,6 +526,8 @@ inline const char* getTypeName(TypeIndex idx) { return "FixedLengthObject"; case TypeIndex::JSONB: return "JSONB"; + case TypeIndex::Struct: + return "Struct"; } __builtin_unreachable(); diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp new file mode 100644 index 0000000000..91aff67a40 --- /dev/null +++ b/be/src/vec/data_types/data_type_struct.cpp @@ -0,0 +1,361 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeTuple.cpp +// and modified by Doris + +#include "vec/data_types/data_type_struct.h" + +namespace doris::vectorized { + +namespace ErrorCodes { +extern const int BAD_ARGUMENTS; +extern const int DUPLICATE_COLUMN; +extern const int EMPTY_DATA_PASSED; +extern const int NOT_FOUND_COLUMN_IN_BLOCK; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; +extern const int ILLEGAL_INDEX; +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes + +DataTypeStruct::DataTypeStruct(const DataTypes& elems_) + : elems(elems_), have_explicit_names(false) { + /// Automatically assigned names in form of '1', '2', ... + size_t size = elems.size(); + names.resize(size); + for (size_t i = 0; i < size; ++i) { + names[i] = std::to_string(i + 1); + } +} + +static std::optional<Exception> check_tuple_names(const Strings& names) { + std::unordered_set<String> names_set; + for (const auto& name : names) { + if (name.empty()) { + return Exception("Names of tuple elements cannot be empty", ErrorCodes::BAD_ARGUMENTS); + } + + if (!names_set.insert(name).second) { + return Exception("Names of tuple elements must be unique", + ErrorCodes::DUPLICATE_COLUMN); + } + } + + return {}; +} + +DataTypeStruct::DataTypeStruct(const DataTypes& elems_, const Strings& names_) + : elems(elems_), names(names_), have_explicit_names(true) { + size_t size = elems.size(); + if (names.size() != size) { + throw Exception("Wrong number of names passed to constructor of DataTypeStruct", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + + if (auto exception = check_tuple_names(names)) { + throw std::move(*exception); + } +} + +std::string DataTypeStruct::do_get_name() const { + size_t size = elems.size(); + std::stringstream s; + + s << "Struct("; + for (size_t i = 0; i < size; ++i) { + if (i != 0) { + s << ", "; + } + + // if (have_explicit_names) { + // s << back_quote_if_need(names[i]) << ' '; + // } + + s << elems[i]->get_name(); + } + s << ")"; + + return s.str(); +} + +static inline IColumn& extract_element_column(IColumn& column, size_t idx) { + return assert_cast<ColumnStruct&>(column).get_column(idx); +} + +template <typename F> +static void add_element_safe(const DataTypes& elems, IColumn& column, F&& impl) { + /// We use the assumption that tuples of zero size do not exist. + size_t old_size = column.size(); + + try { + impl(); + + // Check that all columns now have the same size. + size_t new_size = column.size(); + + // for (auto i : collections::range(0, elems.size())) { + for (auto i = 0; i < elems.size(); i++) { + const auto& element_column = extract_element_column(column, i); + if (element_column.size() != new_size) { + // This is not a logical error because it may work with + // user-supplied data. + throw Exception("Cannot read a tuple because not all elements are present", + ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH); + } + } + } catch (...) { + // for (const auto& i : collections::range(0, elems.size())) { + for (auto i = 0; i < elems.size(); i++) { + auto& element_column = extract_element_column(column, i); + + if (element_column.size() > old_size) { + element_column.pop_back(1); + } + } + + throw; + } +} + +MutableColumnPtr DataTypeStruct::create_column() const { + size_t size = elems.size(); + MutableColumns tuple_columns(size); + for (size_t i = 0; i < size; ++i) { + tuple_columns[i] = elems[i]->create_column(); + } + return ColumnStruct::create(std::move(tuple_columns)); +} + +// MutableColumnPtr DataTypeStruct::create_column(const ISerialization& serialization) const { +// /// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed +// /// several times to allow to reconstruct the substream path name. +// /// Here we don't need substream path name, so we drop first several wrapper serializations. + +// const auto* current_serialization = &serialization; +// while (const auto* serialization_named = +// typeid_cast<const SerializationNamed*>(current_serialization)) +// current_serialization = serialization_named->get_nested().get(); + +// const auto* serialization_tuple = typeid_cast<const SerializationTuple*>(current_serialization); +// if (!serialization_tuple) +// throw Exception(ErrorCodes::LOGICAL_ERROR, +// "Unexpected serialization to create column of type Tuple"); + +// const auto& element_serializations = serialization_tuple->getElementsSerializations(); + +// size_t size = elems.size(); +// assert(element_serializations.size() == size); +// MutableColumns tuple_columns(size); +// for (size_t i = 0; i < size; ++i) { +// tuple_columns[i] = elems[i]->create_column(*element_serializations[i]->get_nested()); +// } + +// return ColumnStruct::create(std::move(tuple_columns)); +// } + +// Field DataTypeStruct::get_default() const { +// return Tuple(collections::map<Tuple>( +// elems, [](const DataTypePtr& elem) { return elem->get_default(); })); +// } + +void DataTypeStruct::insert_default_into(IColumn& column) const { + add_element_safe(elems, column, [&] { + // for (const auto& i : collections::range(0, elems.size())) + for (auto i = 0; i < elems.size(); i++) { + elems[i]->insert_default_into(extract_element_column(column, i)); + } + }); +} + +bool DataTypeStruct::equals(const IDataType& rhs) const { + if (typeid(rhs) != typeid(*this)) { + return false; + } + + const DataTypeStruct& rhs_tuple = static_cast<const DataTypeStruct&>(rhs); + + size_t size = elems.size(); + if (size != rhs_tuple.elems.size()) { + return false; + } + + for (size_t i = 0; i < size; ++i) { + if (!elems[i]->equals(*rhs_tuple.elems[i]) || names[i] != rhs_tuple.names[i]) { + return false; + } + } + + return true; +} + +size_t DataTypeStruct::get_position_by_name(const String& name) const { + size_t size = elems.size(); + for (size_t i = 0; i < size; ++i) { + if (names[i] == name) { + return i; + } + } + throw Exception("Struct doesn't have element with name '" + name + "'", + ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); +} + +std::optional<size_t> DataTypeStruct::try_get_position_by_name(const String& name) const { + size_t size = elems.size(); + for (size_t i = 0; i < size; ++i) { + if (names[i] == name) { + return std::optional<size_t>(i); + } + } + return std::nullopt; +} + +String DataTypeStruct::get_name_by_position(size_t i) const { + if (i == 0 || i > names.size()) { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, "Index of tuple element ({}) if out range ([1, {}])", i, + names.size()); + throw Exception(fmt::to_string(error_msg), ErrorCodes::ILLEGAL_INDEX); + } + + return names[i - 1]; +} + +bool DataTypeStruct::text_can_contain_only_valid_utf8() const { + return std::all_of(elems.begin(), elems.end(), + [](auto&& elem) { return elem->text_can_contain_only_valid_utf8(); }); +} + +bool DataTypeStruct::have_maximum_size_of_value() const { + return std::all_of(elems.begin(), elems.end(), + [](auto&& elem) { return elem->have_maximum_size_of_value(); }); +} + +bool DataTypeStruct::is_comparable() const { + return std::all_of(elems.begin(), elems.end(), + [](auto&& elem) { return elem->is_comparable(); }); +} + +size_t DataTypeStruct::get_maximum_size_of_value_in_memory() const { + size_t res = 0; + for (const auto& elem : elems) { + res += elem->get_maximum_size_of_value_in_memory(); + } + return res; +} + +size_t DataTypeStruct::get_size_of_value_in_memory() const { + size_t res = 0; + for (const auto& elem : elems) { + res += elem->get_size_of_value_in_memory(); + } + return res; +} + +// bool DataTypeStruct::has_dynamic_subcolumns() const { +// return std::any_of(elems.begin(), elems.end(), +// [](auto&& elem) { return elem->has_dynamic_subcolumns(); }); +// } + +// SerializationPtr DataTypeStruct::do_get_default_serialization() const { +// SerializationTuple::ElementSerializations serializations(elems.size()); + +// for (size_t i = 0; i < elems.size(); ++i) { +// String elem_name = have_explicit_names ? names[i] : toString(i + 1); +// auto serialization = elems[i]->get_default_serialization(); +// serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name); +// } + +// return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names); +// } + +// SerializationPtr DataTypeStruct::get_serialization(const SerializationInfo& info) const { +// SerializationTuple::ElementSerializations serializations(elems.size()); +// const auto& info_tuple = assert_cast<const SerializationInfoTuple&>(info); + +// for (size_t i = 0; i < elems.size(); ++i) { +// String elem_name = have_explicit_names ? names[i] : toString(i + 1); +// auto serialization = elems[i]->get_serialization(*info_tuple.get_element_info(i)); +// serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name); +// } + +// return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names); +// } + +// MutableSerializationInfoPtr DataTypeStruct::create_serialization_info( +// const SerializationInfo::Settings& settings) const { +// MutableSerializationInfos infos; +// infos.reserve(elems.size()); +// for (const auto& elem : elems) { +// infos.push_back(elem->create_serializationInfo(settings)); +// } + +// return std::make_shared<SerializationInfoTuple>(std::move(infos), names, settings); +// } + +// SerializationInfoPtr DataTypeStruct::get_serialization_info(const IColumn& column) const { +// if (const auto* column_const = check_and_get_column<ColumnConst>(&column)) { +// return get_serialization_info(column_const->get_data_column()); +// } + +// MutableSerializationInfos infos; +// infos.reserve(elems.size()); + +// const auto& column_tuple = assert_cast<const ColumnStruct&>(column); +// assert(elems.size() == column_tuple.get_columns().size()); + +// for (size_t i = 0; i < elems.size(); ++i) { +// auto element_info = elems[i]->get_serialization_info(column_tuple.getColumn(i)); +// infos.push_back(const_pointer_cast<SerializationInfo>(element_info)); +// } + +// return std::make_shared<SerializationInfoTuple>(std::move(infos), names, +// SerializationInfo::Settings {}); +// } + +// static DataTypePtr create(const ASTPtr& arguments) { +// if (!arguments || arguments->children.empty()) +// throw Exception("Struct cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); + +// DataTypes nested_types; +// nested_types.reserve(arguments->children.size()); + +// Strings names; +// names.reserve(arguments->children.size()); + +// for (const ASTPtr& child : arguments->children) { +// if (const auto* name_and_type_pair = child->as<ASTNameTypePair>()) { +// nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type)); +// names.emplace_back(name_and_type_pair->name); +// } else +// nested_types.emplace_back(DataTypeFactory::instance().get(child)); +// } + +// if (names.empty()) +// return std::make_shared<DataTypeStruct>(nested_types); +// else if (names.size() != nested_types.size()) +// throw Exception("Names are specified not for all elements of Struct type", +// ErrorCodes::BAD_ARGUMENTS); +// else +// return std::make_shared<DataTypeStruct>(nested_types, names); +// } + +// void registerDataTypeStruct(DataTypeFactory& factory) { +// factory.registerDataType("Struct", create); +// } + +} // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_struct.h b/be/src/vec/data_types/data_type_struct.h new file mode 100644 index 0000000000..4201583ef4 --- /dev/null +++ b/be/src/vec/data_types/data_type_struct.h @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeTuple.h +// and modified by Doris + +#pragma once + +#include <exception> + +#include "gen_cpp/data.pb.h" +#include "util/stack_util.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_struct.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" + +namespace doris::vectorized { + +/** Struct data type. + * Used as an intermediate result when evaluating expressions. + * Also can be used as a column - the result of the query execution. + * + * Struct elements can have names. + * If an element is unnamed, it will have automatically assigned name like '1', '2', '3' corresponding to its position. + * Manually assigned names must not begin with digit. Names must be unique. + * + * All tuples with same size and types of elements are equivalent for expressions, regardless to names of elements. + */ +class DataTypeStruct final : public IDataType { +private: + // using DataTypePtr = std::shared_ptr<const IDataType>; + // using DataTypes = std::vector<DataTypePtr>; + // using Strings = std::vector<std::string>; + + DataTypes elems; + Strings names; + bool have_explicit_names; + +public: + // static constexpr bool is_parametric = true; + + explicit DataTypeStruct(const DataTypes& elems); + DataTypeStruct(const DataTypes& elems, const Strings& names); + + TypeIndex get_type_id() const override { return TypeIndex::Struct; } + std::string do_get_name() const override; + const char* get_family_name() const override { return "Struct"; } + + bool can_be_inside_nullable() const override { return false; } + bool supports_sparse_serialization() const { return true; } + + MutableColumnPtr create_column() const override; + // MutableColumnPtr create_column(const ISerialization& serialization) const override; + + Field get_default() const override; + void insert_default_into(IColumn& column) const override; + + bool equals(const IDataType& rhs) const override; + + bool get_is_parametric() const override { return true; } + bool have_subtypes() const override { return !elems.empty(); } + bool is_comparable() const override; + bool text_can_contain_only_valid_utf8() const override; + bool have_maximum_size_of_value() const override; + bool has_dynamic_subcolumns() const; + size_t get_maximum_size_of_value_in_memory() const override; + size_t get_size_of_value_in_memory() const override; + + const DataTypePtr& get_element(size_t i) const { return elems[i]; } + const DataTypes& get_elements() const { return elems; } + const Strings& get_element_names() const { return names; } + + size_t get_position_by_name(const String& name) const; + std::optional<size_t> try_get_position_by_name(const String& name) const; + String get_name_by_position(size_t i) const; + + int64_t get_uncompressed_serialized_bytes(const IColumn& column, + int be_exec_version) const override { + LOG(FATAL) << "get_uncompressed_serialized_bytes not implemented"; + } + + char* serialize(const IColumn& column, char* buf, int be_exec_version) const override { + LOG(FATAL) << "serialize not implemented"; + } + + const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override { + LOG(FATAL) << "serialize not implemented"; + } + + // bool is_parametric() const { return true; } + // SerializationPtr do_get_default_serialization() const override; + // SerializationPtr get_serialization(const SerializationInfo& info) const override; + // MutableSerializationInfoPtr create_serialization_info( + // const SerializationInfo::Settings& settings) const override; + // SerializationInfoPtr get_serialization_info(const IColumn& column) const override; + // bool have_explicit_names() const { return have_explicit_names; } +}; + +} // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org