This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch struct-type in repository https://gitbox.apache.org/repos/asf/doris.git
commit 550e5b23f06b41ba9b3b61b82dc316c27112c03e Author: xy720 <22125576+xy...@users.noreply.github.com> AuthorDate: Thu Jan 12 14:40:08 2023 +0800 [WIP](struct-type) support struct-type in vectorize engine (#15665) --- be/src/olap/aggregate_func.cpp | 2 + be/src/olap/aggregate_func.h | 15 ++ be/src/olap/field.h | 57 ++++- be/src/olap/olap_define.h | 3 + be/src/olap/rowset/segment_v2/column_reader.cpp | 112 ++++++++++ be/src/olap/rowset/segment_v2/column_reader.h | 38 ++++ be/src/olap/rowset/segment_v2/column_writer.cpp | 174 +++++++++++++++ be/src/olap/rowset/segment_v2/column_writer.h | 51 +++++ be/src/olap/rowset/segment_v2/segment_writer.cpp | 9 + be/src/olap/tablet_meta.cpp | 8 +- be/src/olap/tablet_schema.cpp | 20 +- be/src/olap/types.cpp | 52 ++++- be/src/olap/types.h | 245 ++++++++++++++++++++- be/src/runtime/CMakeLists.txt | 1 + be/src/runtime/primitive_type.cpp | 15 ++ be/src/runtime/primitive_type.h | 1 + be/src/runtime/struct_value.cpp | 27 +++ be/src/runtime/struct_value.h | 67 ++++++ be/src/runtime/types.cpp | 48 +++- be/src/runtime/types.h | 2 +- be/src/udf/udf.h | 1 + be/src/vec/CMakeLists.txt | 1 + be/src/vec/columns/column_struct.cpp | 34 +-- be/src/vec/columns/column_struct.h | 15 +- be/src/vec/data_types/data_type_factory.cpp | 41 +++- be/src/vec/data_types/data_type_factory.hpp | 4 + be/src/vec/data_types/data_type_struct.cpp | 33 ++- be/src/vec/data_types/data_type_struct.h | 12 +- be/src/vec/exprs/vcast_expr.cpp | 1 + be/src/vec/exprs/vexpr.cpp | 5 + be/src/vec/exprs/vstruct_literal.cpp | 37 ++++ be/src/vec/exprs/vstruct_literal.h | 34 +++ be/src/vec/functions/function_cast.h | 1 + be/src/vec/olap/olap_data_convertor.cpp | 62 ++++++ be/src/vec/olap/olap_data_convertor.h | 21 ++ be/src/vec/sink/vmysql_result_writer.cpp | 100 +++++++-- be/src/vec/sink/vmysql_result_writer.h | 2 +- be/src/vec/sink/vtablet_sink.cpp | 3 +- fe/fe-core/src/main/cup/sql_parser.cup | 33 ++- .../java/org/apache/doris/analysis/ColumnDef.java | 4 + .../org/apache/doris/analysis/CreateTableStmt.java | 12 +- .../main/java/org/apache/doris/analysis/Expr.java | 7 +- .../org/apache/doris/analysis/StructLiteral.java | 150 +++++++++++++ .../java/org/apache/doris/analysis/Subquery.java | 2 +- .../java/org/apache/doris/analysis/TypeDef.java | 47 ++-- .../java/org/apache/doris/catalog/ArrayType.java | 10 + .../main/java/org/apache/doris/catalog/Column.java | 66 +++--- .../java/org/apache/doris/catalog/MapType.java | 5 + .../org/apache/doris/catalog/PrimitiveType.java | 5 +- .../java/org/apache/doris/catalog/StructField.java | 55 ++++- .../java/org/apache/doris/catalog/StructType.java | 94 +++++++- .../main/java/org/apache/doris/catalog/Type.java | 24 +- fe/fe-core/src/main/jflex/sql_scanner.flex | 4 + gensrc/proto/types.proto | 1 + gensrc/thrift/Exprs.thrift | 1 + gensrc/thrift/Types.thrift | 1 + 56 files changed, 1702 insertions(+), 173 deletions(-) diff --git a/be/src/olap/aggregate_func.cpp b/be/src/olap/aggregate_func.cpp index d943e94015..e8b45b52ab 100644 --- a/be/src/olap/aggregate_func.cpp +++ b/be/src/olap/aggregate_func.cpp @@ -144,6 +144,8 @@ AggregateFuncResolver::AggregateFuncResolver() { OLAP_FIELD_TYPE_DECIMAL128I>(); add_aggregate_mapping<OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY, OLAP_FIELD_TYPE_ARRAY>(); + // struct types + add_aggregate_mapping<OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_STRUCT>(); // Min Aggregate Function add_aggregate_mapping<OLAP_FIELD_AGGREGATION_MIN, OLAP_FIELD_TYPE_TINYINT>(); diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 5a397b658f..b72be9a7cd 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -107,6 +107,21 @@ struct BaseAggregateFuncs { static void finalize(RowCursorCell* src, MemPool* mem_pool) {} }; +// Define an empty trait for struct type because +// we now only support struct type in dup key table. +template <FieldType sub_type> +struct BaseAggregateFuncs<OLAP_FIELD_TYPE_STRUCT, sub_type> { + // Default init do nothing, use direct_copy in struct field instead. + static void init(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, + ObjectPool* agg_pool) {} + + // Default update do nothing. + static void update(RowCursorCell* dst, const RowCursorCell& src, MemPool* mem_pool) {} + + // Default finalize do nothing. + static void finalize(RowCursorCell* src, MemPool* mem_pool) {} +}; + template <FieldType sub_type> struct BaseAggregateFuncs<OLAP_FIELD_TYPE_ARRAY, sub_type> { static void init(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 391f7f45be..97a516f32c 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -49,7 +49,9 @@ public: _index_size(column.index_length()), _is_nullable(column.is_nullable()), _unique_id(column.unique_id()) { - if (column.type() == OLAP_FIELD_TYPE_ARRAY) { + if (column.type() == OLAP_FIELD_TYPE_STRUCT) { + _agg_info = get_aggregate_info(column.aggregation(), column.type()); + } else if (column.type() == OLAP_FIELD_TYPE_ARRAY) { _agg_info = get_aggregate_info(column.aggregation(), column.type(), column.get_sub_column(0).type()); } else { @@ -311,6 +313,9 @@ protected: const AggregateInfo* _agg_info; // unit : byte // except for strings, other types have fixed lengths + // Note that, the struct type itself has fixed length, but due to + // its number of subfields is a variable, so the actual length of + // a struct field is not fixed. uint32_t _length; // Since the length of the STRING type cannot be determined, // only dynamic memory can be used. Mempool cannot realize realloc. @@ -450,6 +455,38 @@ uint32_t Field::hash_code(const CellType& cell, uint32_t seed) const { return _type_info->hash_code(cell.cell_ptr(), seed); } +class StructField : public Field { +public: + explicit StructField(const TabletColumn& column) : Field(column) {} + + void consume(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, + ObjectPool* agg_pool) const override { + dst->set_is_null(src_null); + if (src_null) { + return; + } + _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + } + + char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { + auto struct_v = (StructValue*)cell_ptr; + struct_v->set_values(reinterpret_cast<void**>(variable_ptr)); + variable_ptr += _length; + for (size_t i = 0; i < get_sub_field_count(); i++) { + variable_ptr += get_sub_field(i)->get_variable_len(); + } + return variable_ptr; + } + + size_t get_variable_len() const override { + size_t variable_len = _length; + for (size_t i = 0; i < get_sub_field_count(); i++) { + variable_len += get_sub_field(i)->get_variable_len(); + } + return variable_len; + } +}; + class ArrayField : public Field { public: explicit ArrayField(const TabletColumn& column) : Field(column) {} @@ -746,6 +783,15 @@ public: return new VarcharField(column); case OLAP_FIELD_TYPE_STRING: return new StringField(column); + case OLAP_FIELD_TYPE_STRUCT: { + auto* local = new StructField(column); + for (uint32_t i = 0; i < column.get_subtype_count(); i++) { + std::unique_ptr<Field> sub_field( + FieldFactory::create(column.get_sub_column(i))); + local->add_sub_field(std::move(sub_field)); + } + return local; + } case OLAP_FIELD_TYPE_ARRAY: { std::unique_ptr<Field> item_field(FieldFactory::create(column.get_sub_column(0))); auto* local = new ArrayField(column); @@ -786,6 +832,15 @@ public: return new VarcharField(column); case OLAP_FIELD_TYPE_STRING: return new StringField(column); + case OLAP_FIELD_TYPE_STRUCT: { + auto* local = new StructField(column); + for (uint32_t i = 0; i < column.get_subtype_count(); i++) { + std::unique_ptr<Field> sub_field( + FieldFactory::create(column.get_sub_column(i))); + local->add_sub_field(std::move(sub_field)); + } + return local; + } case OLAP_FIELD_TYPE_ARRAY: { std::unique_ptr<Field> item_field(FieldFactory::create(column.get_sub_column(0))); auto* local = new ArrayField(column); diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index a9c92c69e2..fadbea6834 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -60,6 +60,9 @@ static const uint32_t OLAP_STRING_MAX_LENGTH = 2147483647; // the max length supported for jsonb type 2G static const uint32_t OLAP_JSONB_MAX_LENGTH = 2147483647; +// the max length supported for struct, but excluding the length of its subtypes. +static const uint16_t OLAP_STRUCT_MAX_LENGTH = 65535; + // the max length supported for array static const uint16_t OLAP_ARRAY_MAX_LENGTH = 65535; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 6f49aa23cf..968caae9c5 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -31,6 +31,7 @@ #include "util/rle_encoding.h" // for RleDecoder #include "vec/columns/column.h" #include "vec/columns/column_array.h" +#include "vec/columns/column_struct.h" #include "vec/core/types.h" #include "vec/runtime/vdatetime_value.h" //for VecDateTime @@ -49,6 +50,23 @@ Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB& } else { auto type = (FieldType)meta.type(); switch (type) { + case FieldType::OLAP_FIELD_TYPE_STRUCT: { + // not support empty struct + DCHECK(meta.children_columns_size() >= 1); + // create struct column reader + std::unique_ptr<ColumnReader> struct_reader( + new ColumnReader(opts, meta, num_rows, file_reader)); + struct_reader->_sub_readers.reserve(meta.children_columns_size()); + for (size_t i = 0; i < meta.children_columns_size(); i++) { + std::unique_ptr<ColumnReader> sub_reader; + RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(i), + meta.children_columns(i).num_rows(), + file_reader, &sub_reader)); + struct_reader->_sub_readers.push_back(std::move(sub_reader)); + } + *reader = std::move(struct_reader); + return Status::OK(); + } case FieldType::OLAP_FIELD_TYPE_ARRAY: { DCHECK(meta.children_columns_size() == 2 || meta.children_columns_size() == 3); @@ -433,6 +451,24 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { } else { auto type = (FieldType)_meta.type(); switch (type) { + case FieldType::OLAP_FIELD_TYPE_STRUCT: { + std::vector<ColumnIterator*> sub_column_iterators; + size_t child_size = is_nullable() ? _sub_readers.size() - 1 : _sub_readers.size(); + sub_column_iterators.reserve(child_size); + + ColumnIterator* sub_column_iterator; + for (size_t i = 0; i < child_size; i++) { + RETURN_IF_ERROR(_sub_readers[i]->new_iterator(&sub_column_iterator)); + sub_column_iterators.push_back(sub_column_iterator); + } + + ColumnIterator* null_iterator = nullptr; + if (is_nullable()) { + RETURN_IF_ERROR(_sub_readers[child_size]->new_iterator(&null_iterator)); + } + *iterator = new StructFileColumnIterator(this, null_iterator, sub_column_iterators); + return Status::OK(); + } case FieldType::OLAP_FIELD_TYPE_ARRAY: { ColumnIterator* item_iterator = nullptr; RETURN_IF_ERROR(_sub_readers[0]->new_iterator(&item_iterator)); @@ -458,6 +494,82 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { //////////////////////////////////////////////////////////////////////////////// +StructFileColumnIterator::StructFileColumnIterator( + ColumnReader* reader, ColumnIterator* null_iterator, + std::vector<ColumnIterator*>& sub_column_iterators) + : _struct_reader(reader) { + _sub_column_iterators.resize(sub_column_iterators.size()); + for (size_t i = 0; i < sub_column_iterators.size(); i++) { + _sub_column_iterators[i].reset(sub_column_iterators[i]); + } + if (_struct_reader->is_nullable()) { + _null_iterator.reset(null_iterator); + } +} + +Status StructFileColumnIterator::init(const ColumnIteratorOptions& opts) { + for (auto& column_iterator : _sub_column_iterators) { + RETURN_IF_ERROR(column_iterator->init(opts)); + } + if (_struct_reader->is_nullable()) { + RETURN_IF_ERROR(_null_iterator->init(opts)); + } + return Status::OK(); +} + +Status StructFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) { + return Status::NotSupported("not supported"); +} + +Status StructFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { + const auto* column_struct = vectorized::check_and_get_column<vectorized::ColumnStruct>( + dst->is_nullable() ? static_cast<vectorized::ColumnNullable&>(*dst).get_nested_column() + : *dst); + for (size_t i = 0; i < column_struct->tuple_size(); i++) { + size_t num_read = *n; + auto sub_column_ptr = column_struct->get_column(i).assume_mutable(); + bool column_has_null = false; + RETURN_IF_ERROR( + _sub_column_iterators[i]->next_batch(&num_read, sub_column_ptr, &column_has_null)); + DCHECK(num_read == *n); + } + + if (dst->is_nullable()) { + size_t num_read = *n; + auto null_map_ptr = + static_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column_ptr(); + bool null_signs_has_null = false; + RETURN_IF_ERROR(_null_iterator->next_batch(&num_read, null_map_ptr, &null_signs_has_null)); + DCHECK(num_read == *n); + } + + return Status::OK(); +} + +Status StructFileColumnIterator::seek_to_ordinal(ordinal_t ord) { + for (auto& column_iterator : _sub_column_iterators) { + RETURN_IF_ERROR(column_iterator->seek_to_ordinal(ord)); + } + if (_struct_reader->is_nullable()) { + RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord)); + } + return Status::OK(); +} + +Status StructFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) { + for (size_t i = 0; i < count; ++i) { + RETURN_IF_ERROR(seek_to_ordinal(rowids[i])); + size_t num_read = 1; + RETURN_IF_ERROR(next_batch(&num_read, dst, nullptr)); + DCHECK(num_read == 1); + } + return Status::OK(); +} + +//////////////////////////////////////////////////////////////////////////////// + ArrayFileColumnIterator::ArrayFileColumnIterator(ColumnReader* reader, FileColumnIterator* offset_reader, ColumnIterator* item_iterator, diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index c5dd729d82..b10e46ea98 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -393,6 +393,44 @@ public: ordinal_t get_current_ordinal() const override { return 0; } }; +class StructFileColumnIterator final : public ColumnIterator { +public: + explicit StructFileColumnIterator(ColumnReader* reader, ColumnIterator* null_iterator, + std::vector<ColumnIterator*>& sub_column_iterators); + + ~StructFileColumnIterator() override = default; + + Status init(const ColumnIteratorOptions& opts) override; + + Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override; + + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) override; + + Status seek_to_first() override { + for (auto& column_iterator : _sub_column_iterators) { + RETURN_IF_ERROR(column_iterator->seek_to_first()); + } + if (_struct_reader->is_nullable()) { + RETURN_IF_ERROR(_null_iterator->seek_to_first()); + } + return Status::OK(); + } + + Status seek_to_ordinal(ordinal_t ord) override; + + ordinal_t get_current_ordinal() const override { + return _sub_column_iterators[0]->get_current_ordinal(); + } + +private: + ColumnReader* _struct_reader; + std::unique_ptr<ColumnIterator> _null_iterator; + std::vector<std::unique_ptr<ColumnIterator>> _sub_column_iterators; +}; + class ArrayFileColumnIterator final : public ColumnIterator { public: explicit ArrayFileColumnIterator(ColumnReader* reader, FileColumnIterator* offset_reader, diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index df820b141e..fdd42f9444 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -88,6 +88,78 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* return Status::OK(); } else { switch (column->type()) { + case FieldType::OLAP_FIELD_TYPE_STRUCT: { + // not support empty struct + DCHECK(column->get_subtype_count() >= 1); + std::vector<std::unique_ptr<ColumnWriter>> sub_column_writers; + sub_column_writers.reserve(column->get_subtype_count()); + for (uint32_t i = 0; i < column->get_subtype_count(); i++) { + const TabletColumn& sub_column = column->get_sub_column(i); + + // create sub writer + ColumnWriterOptions column_options; + column_options.meta = opts.meta->mutable_children_columns(i); + column_options.need_zone_map = false; + column_options.need_bloom_filter = sub_column.is_bf_column(); + column_options.need_bitmap_index = sub_column.has_bitmap_index(); + column_options.inverted_index = nullptr; + if (sub_column.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) { + if (column_options.need_bloom_filter) { + return Status::NotSupported("Do not support bloom filter for struct type"); + } + if (column_options.need_bitmap_index) { + return Status::NotSupported("Do not support bitmap index for struct type"); + } + } + if (sub_column.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) { + if (column_options.need_bloom_filter) { + return Status::NotSupported("Do not support bloom filter for array type"); + } + if (column_options.need_bitmap_index) { + return Status::NotSupported("Do not support bitmap index for array type"); + } + } + std::unique_ptr<ColumnWriter> sub_column_writer; + RETURN_IF_ERROR(ColumnWriter::create(column_options, &sub_column, file_writer, + &sub_column_writer)); + sub_column_writers.push_back(std::move(sub_column_writer)); + } + + // if nullable, create null writer + ScalarColumnWriter* null_writer = nullptr; + if (opts.meta->is_nullable()) { + FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT; + ColumnWriterOptions null_options; + null_options.meta = opts.meta->add_children_columns(); + null_options.meta->set_column_id(column->get_subtype_count() + 1); + null_options.meta->set_unique_id(column->get_subtype_count() + 1); + null_options.meta->set_type(null_type); + null_options.meta->set_is_nullable(false); + null_options.meta->set_length( + get_scalar_type_info<OLAP_FIELD_TYPE_TINYINT>()->size()); + null_options.meta->set_encoding(DEFAULT_ENCODING); + null_options.meta->set_compression(opts.meta->compression()); + + null_options.need_zone_map = false; + null_options.need_bloom_filter = false; + null_options.need_bitmap_index = false; + + TabletColumn null_column = TabletColumn( + OLAP_FIELD_AGGREGATION_NONE, null_type, null_options.meta->is_nullable(), + null_options.meta->unique_id(), null_options.meta->length()); + null_column.set_name("nullable"); + null_column.set_index_length(-1); // no short key index + std::unique_ptr<Field> null_field(FieldFactory::create(null_column)); + null_writer = + new ScalarColumnWriter(null_options, std::move(null_field), file_writer); + } + + std::unique_ptr<ColumnWriter> writer_local = + std::unique_ptr<ColumnWriter>(new StructColumnWriter( + opts, std::move(field), null_writer, sub_column_writers)); + *writer = std::move(writer_local); + return Status::OK(); + } case FieldType::OLAP_FIELD_TYPE_ARRAY: { DCHECK(column->get_subtype_count() == 1); const TabletColumn& item_column = column->get_sub_column(0); @@ -542,6 +614,108 @@ Status ScalarColumnWriter::finish_current_page() { //////////////////////////////////////////////////////////////////////////////// +StructColumnWriter::StructColumnWriter( + const ColumnWriterOptions& opts, std::unique_ptr<Field> field, + ScalarColumnWriter* null_writer, + std::vector<std::unique_ptr<ColumnWriter>>& sub_column_writers) + : ColumnWriter(std::move(field), opts.meta->is_nullable()), _opts(opts) { + for (auto& sub_column_writer : sub_column_writers) { + _sub_column_writers.push_back(std::move(sub_column_writer)); + } + _num_sub_column_writers = _sub_column_writers.size(); + DCHECK(_num_sub_column_writers >= 1); + if (is_nullable()) { + _null_writer.reset(null_writer); + } +} + +Status StructColumnWriter::init() { + for (auto& column_writer : _sub_column_writers) { + RETURN_IF_ERROR(column_writer->init()); + } + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->init()); + } + return Status::OK(); +} + +Status StructColumnWriter::write_inverted_index() { + if (_opts.inverted_index) { + for (auto& column_writer : _sub_column_writers) { + RETURN_IF_ERROR(column_writer->write_inverted_index()); + } + } + return Status::OK(); +} + +Status StructColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t** ptr, + size_t num_rows) { + RETURN_IF_ERROR(append_data(ptr, num_rows)); + RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows)); + return Status::OK(); +} + +Status StructColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { + auto data_cursor = reinterpret_cast<const void**>(ptr); + auto null_map_cursor = data_cursor + _num_sub_column_writers; + for (auto& column_writer : _sub_column_writers) { + RETURN_IF_ERROR(column_writer->append(reinterpret_cast<const uint8_t*>(*null_map_cursor), + *data_cursor, num_rows)); + data_cursor++; + null_map_cursor++; + } + return Status::OK(); +} + +uint64_t StructColumnWriter::estimate_buffer_size() { + uint64_t size = 0; + for (auto& column_writer : _sub_column_writers) { + size += column_writer->estimate_buffer_size(); + } + size += is_nullable() ? _null_writer->estimate_buffer_size() : 0; + return size; +} + +Status StructColumnWriter::finish() { + for (auto& column_writer : _sub_column_writers) { + RETURN_IF_ERROR(column_writer->finish()); + } + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->finish()); + } + return Status::OK(); +} + +Status StructColumnWriter::write_data() { + for (auto& column_writer : _sub_column_writers) { + RETURN_IF_ERROR(column_writer->write_data()); + } + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->write_data()); + } + return Status::OK(); +} + +Status StructColumnWriter::write_ordinal_index() { + for (auto& column_writer : _sub_column_writers) { + RETURN_IF_ERROR(column_writer->write_ordinal_index()); + } + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->write_ordinal_index()); + } + return Status::OK(); +} + +Status StructColumnWriter::append_nulls(size_t num_rows) { + return Status::NotSupported("struct writer not support append nulls"); +} + +Status StructColumnWriter::finish_current_page() { + return Status::NotSupported("struct writer has no data, can not finish_current_page"); +} + +//////////////////////////////////////////////////////////////////////////////// + ArrayColumnWriter::ArrayColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<Field> field, ScalarColumnWriter* offset_writer, ScalarColumnWriter* null_writer, diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 5ea7ae654c..007a69a6d9 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -265,6 +265,57 @@ private: FlushPageCallback* _new_page_callback = nullptr; }; +class StructColumnWriter final : public ColumnWriter { +public: + explicit StructColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<Field> field, + ScalarColumnWriter* null_writer, + std::vector<std::unique_ptr<ColumnWriter>>& sub_column_writers); + ~StructColumnWriter() override = default; + + Status init() override; + + Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows); + Status append_data(const uint8_t** ptr, size_t num_rows) override; + + uint64_t estimate_buffer_size() override; + + Status finish() override; + Status write_data() override; + Status write_ordinal_index() override; + Status append_nulls(size_t num_rows) override; + + Status finish_current_page() override; + + Status write_zone_map() override { + if (_opts.need_zone_map) { + return Status::NotSupported("struct not support zone map"); + } + return Status::OK(); + } + + Status write_bitmap_index() override { + if (_opts.need_bitmap_index) { + return Status::NotSupported("struct not support bitmap index"); + } + return Status::OK(); + } + Status write_inverted_index() override; + Status write_bloom_filter_index() override { + if (_opts.need_bloom_filter) { + return Status::NotSupported("struct not support bloom filter index"); + } + return Status::OK(); + } + + ordinal_t get_next_rowid() const override { return _sub_column_writers[0]->get_next_rowid(); } + +private: + size_t _num_sub_column_writers; + std::unique_ptr<ScalarColumnWriter> _null_writer; + std::vector<std::unique_ptr<ColumnWriter>> _sub_column_writers; + ColumnWriterOptions _opts; +}; + class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit ArrayColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<Field> field, diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index df7f26c34f..28ef07d40d 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -135,6 +135,15 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key) { break; } } + if (column.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) { + opts.need_zone_map = false; + if (opts.need_bloom_filter) { + return Status::NotSupported("Do not support bloom filter for struct type"); + } + if (opts.need_bitmap_index) { + return Status::NotSupported("Do not support bitmap index for struct type"); + } + } if (column.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) { opts.need_zone_map = false; if (opts.need_bloom_filter) { diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index e3d4c6f3ff..24b5460549 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -295,8 +295,12 @@ void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tco if (tcolumn.__isset.is_bloom_filter_column) { column->set_is_bf_column(tcolumn.is_bloom_filter_column); } - - if (tcolumn.column_type.type == TPrimitiveType::ARRAY) { + if (tcolumn.column_type.type == TPrimitiveType::STRUCT) { + for (size_t i = 0; i < tcolumn.children_column.size(); i++) { + ColumnPB* children_column = column->add_children_columns(); + init_column_from_tcolumn(i, tcolumn.children_column[i], children_column); + } + } else if (tcolumn.column_type.type == TPrimitiveType::ARRAY) { ColumnPB* children_column = column->add_children_columns(); init_column_from_tcolumn(0, tcolumn.children_column[0], children_column); } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 55f030cbb9..9fe92eb6b4 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -310,6 +310,10 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3 return string_length + sizeof(OLAP_STRING_MAX_LENGTH); case TPrimitiveType::JSONB: return string_length + sizeof(OLAP_JSONB_MAX_LENGTH); + case TPrimitiveType::STRUCT: + // Note that(xy): this is the length of struct type itself, + // the length of its subtypes are not included. + return OLAP_STRUCT_MAX_LENGTH; case TPrimitiveType::ARRAY: return OLAP_ARRAY_MAX_LENGTH; case TPrimitiveType::DECIMAL32: @@ -402,8 +406,13 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { if (column.has_visible()) { _visible = column.visible(); } - - if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { + if (_type == FieldType::OLAP_FIELD_TYPE_STRUCT) { + for (size_t i = 0; i < column.children_columns_size(); i++) { + TabletColumn child_column; + child_column.init_from_pb(column.children_columns(i)); + add_sub_column(child_column); + } + } else if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { DCHECK(column.children_columns_size() == 1) << "ARRAY type has more than 1 children types."; TabletColumn child_column; child_column.init_from_pb(column.children_columns(0)); @@ -435,7 +444,12 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const { } column->set_visible(_visible); - if (_type == OLAP_FIELD_TYPE_ARRAY) { + if (_type == OLAP_FIELD_TYPE_STRUCT) { + for (size_t i = 0; i < _sub_columns.size(); i++) { + ColumnPB* child = column->add_children_columns(); + _sub_columns[i].to_schema_pb(child); + } + } else if (_type == OLAP_FIELD_TYPE_ARRAY) { DCHECK(_sub_columns.size() == 1) << "ARRAY type has more than 1 children types."; ColumnPB* child = column->add_children_columns(); _sub_columns[0].to_schema_pb(child); diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index aa2226a84a..916f7768af 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -166,10 +166,33 @@ const TypeInfo* get_array_type_info(FieldType leaf_type, int32_t iterations) { return array_type_Info_arr[leaf_type][iterations]; } +// Produce a struct type info +// TODO(xy): Need refactor to this produce method +const TypeInfo* get_struct_type_info(std::vector<FieldType> field_types) { + std::vector<TypeInfoPtr> type_infos; + type_infos.reserve(field_types.size()); + for (FieldType& type : field_types) { + if (is_scalar_type(type)) { + type_infos.push_back(create_static_type_info_ptr(get_scalar_type_info(type))); + } else { + // TODO(xy): Not supported nested complex type now + } + } + return new StructTypeInfo(type_infos); +} + // TODO: Support the type info of the nested array with more than 9 depths. +// TODO(xy): Support the type info of the nested struct TypeInfoPtr get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) { FieldType type = (FieldType)column_meta_pb->type(); - if (UNLIKELY(type == OLAP_FIELD_TYPE_ARRAY)) { + if (UNLIKELY(type == OLAP_FIELD_TYPE_STRUCT)) { + std::vector<FieldType> field_types; + for (uint32_t i = 0; i < column_meta_pb->children_columns_size(); i++) { + const auto* child_column = &column_meta_pb->children_columns(i); + field_types.push_back((FieldType)child_column->type()); + } + return create_dynamic_type_info_ptr(get_struct_type_info(field_types)); + } else if (UNLIKELY(type == OLAP_FIELD_TYPE_ARRAY)) { int32_t iterations = 0; const auto* child_column = &column_meta_pb->children_columns(0); while (child_column->type() == OLAP_FIELD_TYPE_ARRAY) { @@ -202,7 +225,14 @@ TypeInfoPtr create_type_info_ptr(const TypeInfo* type_info, bool should_reclaim_ // TODO: Support the type info of the nested array with more than 9 depths. TypeInfoPtr get_type_info(const TabletColumn* col) { auto type = col->type(); - if (UNLIKELY(type == OLAP_FIELD_TYPE_ARRAY)) { + if (UNLIKELY(type == OLAP_FIELD_TYPE_STRUCT)) { + std::vector<FieldType> field_types; + for (uint32_t i = 0; i < col->get_subtype_count(); i++) { + const auto* child_column = &col->get_sub_column(i); + field_types.push_back(child_column->type()); + } + return create_dynamic_type_info_ptr(get_struct_type_info(field_types)); + } else if (UNLIKELY(type == OLAP_FIELD_TYPE_ARRAY)) { int32_t iterations = 0; const auto* child_column = &col->get_sub_column(0); while (child_column->type() == OLAP_FIELD_TYPE_ARRAY) { @@ -219,9 +249,21 @@ TypeInfoPtr clone_type_info(const TypeInfo* type_info) { if (is_scalar_type(type_info->type())) { return create_static_type_info_ptr(type_info); } else { - const auto array_type_info = dynamic_cast<const ArrayTypeInfo*>(type_info); - return create_dynamic_type_info_ptr( - new ArrayTypeInfo(clone_type_info(array_type_info->item_type_info()))); + auto type = type_info->type(); + if (type == OLAP_FIELD_TYPE_STRUCT) { + const auto struct_type_info = dynamic_cast<const StructTypeInfo*>(type_info); + std::vector<TypeInfoPtr> clone_type_infos; + const std::vector<TypeInfoPtr>* sub_type_infos = struct_type_info->type_infos(); + clone_type_infos.reserve(sub_type_infos->size()); + for (size_t i = 0; i < sub_type_infos->size(); i++) { + clone_type_infos.push_back(clone_type_info((*sub_type_infos)[i].get())); + } + return create_dynamic_type_info_ptr(new StructTypeInfo(clone_type_infos)); + } else { + const auto array_type_info = dynamic_cast<const ArrayTypeInfo*>(type_info); + return create_dynamic_type_info_ptr( + new ArrayTypeInfo(clone_type_info(array_type_info->item_type_info()))); + } } } diff --git a/be/src/olap/types.h b/be/src/olap/types.h index c580f2ea53..a8f21b66ae 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -32,6 +32,7 @@ #include "runtime/collection_value.h" #include "runtime/jsonb_value.h" #include "runtime/mem_pool.h" +#include "runtime/struct_value.h" #include "util/jsonb_document.h" #include "util/jsonb_utils.h" #include "util/mem_util.hpp" @@ -431,6 +432,245 @@ private: const size_t _item_size; }; +class StructTypeInfo : public TypeInfo { +public: + explicit StructTypeInfo(std::vector<TypeInfoPtr>& type_infos) { + for (TypeInfoPtr& type_info : type_infos) { + _type_infos.push_back(std::move(type_info)); + } + } + ~StructTypeInfo() override = default; + + bool equal(const void* left, const void* right) const override { + auto l_value = reinterpret_cast<const StructValue*>(left); + auto r_value = reinterpret_cast<const StructValue*>(right); + if (l_value->size() != r_value->size()) { + return false; + } + uint32_t size = l_value->size(); + + if (!l_value->has_null() && !r_value->has_null()) { + for (size_t i = 0; i < size; ++i) { + if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) { + return false; + } + } + } else { + for (size_t i = 0; i < size; ++i) { + if (l_value->is_null_at(i)) { + if (r_value->is_null_at(i)) { // both are null + continue; + } else { // left is null & right is not null + return false; + } + } else if (r_value->is_null_at(i)) { // left is not null & right is null + return false; + } + if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) { + return false; + } + } + } + return true; + } + + int cmp(const void* left, const void* right) const override { + auto l_value = reinterpret_cast<const StructValue*>(left); + auto r_value = reinterpret_cast<const StructValue*>(right); + uint32_t l_size = l_value->size(); + uint32_t r_size = r_value->size(); + size_t cur = 0; + + if (!l_value->has_null() && !r_value->has_null()) { + while (cur < l_size && cur < r_size) { + int result = + _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur)); + if (result != 0) { + return result; + } + ++cur; + } + } else { + while (cur < l_size && cur < r_size) { + if (l_value->is_null_at(cur)) { + if (!r_value->is_null_at(cur)) { // left is null & right is not null + return -1; + } + } else if (r_value->is_null_at(cur)) { // left is not null & right is null + return 1; + } else { // both are not null + int result = _type_infos[cur]->cmp(l_value->child_value(cur), + r_value->child_value(cur)); + if (result != 0) { + return result; + } + } + ++cur; + } + } + + if (l_size < r_size) { + return -1; + } else if (l_size > r_size) { + return 1; + } else { + return 0; + } + } + + void shallow_copy(void* dest, const void* src) const override { + auto dest_value = reinterpret_cast<StructValue*>(dest); + auto src_value = reinterpret_cast<const StructValue*>(src); + dest_value->shallow_copy(src_value); + } + + void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override { + auto dest_value = reinterpret_cast<StructValue*>(dest); + auto src_value = reinterpret_cast<const StructValue*>(src); + + if (src_value->size() == 0) { + new (dest_value) StructValue(src_value->size()); + return; + } + + dest_value->set_size(src_value->size()); + dest_value->set_has_null(src_value->has_null()); + + size_t allocate_size = src_value->size() * sizeof(*src_value->values()); + // allocate memory for children value + for (size_t i = 0; i < src_value->size(); ++i) { + if (src_value->is_null_at(i)) continue; + allocate_size += _type_infos[i]->size(); + } + + dest_value->set_values((void**)mem_pool->allocate(allocate_size)); + auto ptr = reinterpret_cast<uint8_t*>(dest_value->mutable_values()); + ptr += dest_value->size() * sizeof(*dest_value->values()); + + for (size_t i = 0; i < src_value->size(); ++i) { + dest_value->set_child_value(nullptr, i); + if (src_value->is_null_at(i)) continue; + dest_value->set_child_value(ptr, i); + ptr += _type_infos[i]->size(); + } + + // copy children value + for (size_t i = 0; i < src_value->size(); ++i) { + if (src_value->is_null_at(i)) continue; + _type_infos[i]->deep_copy(dest_value->mutable_child_value(i), src_value->child_value(i), + mem_pool); + } + } + + void copy_object(void* dest, const void* src, MemPool* mem_pool) const override { + deep_copy(dest, src, mem_pool); + } + + void direct_copy(void* dest, const void* src) const override { + auto dest_value = static_cast<StructValue*>(dest); + auto base = reinterpret_cast<uint8_t*>(dest_value->mutable_values()); + direct_copy(&base, dest, src); + } + + void direct_copy(uint8_t** base, void* dest, const void* src) const { + auto dest_value = static_cast<StructValue*>(dest); + auto src_value = static_cast<const StructValue*>(src); + + dest_value->set_size(src_value->size()); + dest_value->set_has_null(src_value->has_null()); + *base += src_value->size() * sizeof(*src_value->values()); + + for (size_t i = 0; i < src_value->size(); ++i) { + dest_value->set_child_value(nullptr, i); + if (src_value->is_null_at(i)) continue; + dest_value->set_child_value(*base, i); + *base += _type_infos[i]->size(); + } + + for (size_t i = 0; i < src_value->size(); ++i) { + if (dest_value->is_null_at(i)) { + continue; + } + auto dest_address = dest_value->mutable_child_value(i); + auto src_address = src_value->child_value(i); + if (_type_infos[i]->type() == OLAP_FIELD_TYPE_STRUCT) { + dynamic_cast<const StructTypeInfo*>(_type_infos[i].get()) + ->direct_copy(base, dest_address, src_address); + } else if (_type_infos[i]->type() == OLAP_FIELD_TYPE_ARRAY) { + dynamic_cast<const ArrayTypeInfo*>(_type_infos[i].get()) + ->direct_copy(base, dest_address, src_address); + } else { + if (is_olap_string_type(_type_infos[i]->type())) { + auto dest_slice = reinterpret_cast<Slice*>(dest_address); + auto src_slice = reinterpret_cast<const Slice*>(src_address); + dest_slice->data = reinterpret_cast<char*>(*base); + dest_slice->size = src_slice->size; + *base += src_slice->size; + } + _type_infos[i]->direct_copy(dest_address, src_address); + } + } + } + + void direct_copy_may_cut(void* dest, const void* src) const override { direct_copy(dest, src); } + + Status convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool, + size_t variable_len = 0) const override { + return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(); + } + + Status from_string(void* buf, const std::string& scan_key, const int precision = 0, + const int scale = 0) const override { + return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(); + } + + std::string to_string(const void* src) const override { + auto src_value = reinterpret_cast<const StructValue*>(src); + std::string result = "{"; + + for (size_t i = 0; i < src_value->size(); ++i) { + std::string field_value = _type_infos[i]->to_string(src_value->child_value(i)); + result += field_value; + if (i < src_value->size() - 1) { + result += ", "; + } + } + result += "}"; + return result; + } + + void set_to_max(void* buf) const override { + DCHECK(false) << "set_to_max of list is not implemented."; + } + + void set_to_min(void* buf) const override { + DCHECK(false) << "set_to_min of list is not implemented."; + } + + uint32_t hash_code(const void* data, uint32_t seed) const override { + auto struct_value = reinterpret_cast<const StructValue*>(data); + auto size = struct_value->size(); + uint32_t result = HashUtil::hash(&size, sizeof(size), seed); + for (size_t i = 0; i < size; ++i) { + if (struct_value->is_null_at(i)) { + result = seed * result; + } else { + result = seed * result + _type_infos[i]->hash_code(struct_value->values()[i], seed); + } + } + return result; + } + + const size_t size() const override { return sizeof(StructValue); } + + FieldType type() const override { return OLAP_FIELD_TYPE_STRUCT; } + + inline const std::vector<TypeInfoPtr>* type_infos() const { return &_type_infos; } + +private: + std::vector<TypeInfoPtr> _type_infos; +}; + bool is_scalar_type(FieldType field_type); const TypeInfo* get_scalar_type_info(FieldType field_type); @@ -566,12 +806,15 @@ template <> struct CppTypeTraits<OLAP_FIELD_TYPE_OBJECT> { using CppType = Slice; }; - template <> struct CppTypeTraits<OLAP_FIELD_TYPE_QUANTILE_STATE> { using CppType = Slice; }; template <> +struct CppTypeTraits<OLAP_FIELD_TYPE_STRUCT> { + using CppType = StructValue; +}; +template <> struct CppTypeTraits<OLAP_FIELD_TYPE_ARRAY> { using CppType = CollectionValue; }; diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index dfc375bd24..92ffc2071d 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -48,6 +48,7 @@ set(RUNTIME_FILES threadlocal.cc decimalv2_value.cpp large_int_value.cpp + struct_value.cpp collection_value.cpp tuple.cpp tuple_row.cpp diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 779676a45d..27bc9f5b6e 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -22,6 +22,7 @@ #include "runtime/define_primitive_type.h" #include "runtime/jsonb_value.h" #include "runtime/string_value.h" +#include "runtime/struct_value.h" namespace doris { @@ -53,6 +54,8 @@ PrimitiveType convert_type_to_primitive(FunctionContext::Type type) { return PrimitiveType::TYPE_BOOLEAN; case FunctionContext::Type::TYPE_ARRAY: return PrimitiveType::TYPE_ARRAY; + case FunctionContext::Type::TYPE_STRUCT: + return PrimitiveType::TYPE_STRUCT; case FunctionContext::Type::TYPE_OBJECT: return PrimitiveType::TYPE_OBJECT; case FunctionContext::Type::TYPE_HLL: @@ -95,6 +98,7 @@ int get_byte_size(PrimitiveType type) { case TYPE_HLL: case TYPE_QUANTILE_STATE: case TYPE_ARRAY: + case TYPE_STRUCT: case TYPE_MAP: return 0; @@ -262,6 +266,9 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::ARRAY: return TYPE_ARRAY; + case TPrimitiveType::STRUCT: + return TYPE_STRUCT; + default: return INVALID_TYPE; } @@ -356,6 +363,9 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) { case TYPE_ARRAY: return TPrimitiveType::ARRAY; + case TYPE_STRUCT: + return TPrimitiveType::STRUCT; + default: return TPrimitiveType::INVALID_TYPE; } @@ -450,6 +460,9 @@ std::string type_to_string(PrimitiveType t) { case TYPE_ARRAY: return "ARRAY"; + case TYPE_STRUCT: + return "STRUCT"; + default: return ""; }; @@ -589,6 +602,8 @@ int get_slot_size(PrimitiveType type) { return sizeof(JsonBinaryValue); case TYPE_ARRAY: return sizeof(CollectionValue); + case TYPE_STRUCT: + return sizeof(StructValue); case TYPE_NULL: case TYPE_BOOLEAN: diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 5af7639290..f84f9a3348 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -54,6 +54,7 @@ constexpr bool is_enumeration_type(PrimitiveType type) { case TYPE_DECIMAL128I: case TYPE_BOOLEAN: case TYPE_ARRAY: + case TYPE_STRUCT: case TYPE_HLL: return false; case TYPE_TINYINT: diff --git a/be/src/runtime/struct_value.cpp b/be/src/runtime/struct_value.cpp new file mode 100644 index 0000000000..ff0b9cc62a --- /dev/null +++ b/be/src/runtime/struct_value.cpp @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/struct_value.h" + +namespace doris { + +void StructValue::shallow_copy(const StructValue* other) { + _size = other->_size; + _values = other->_values; + _has_null = other->_has_null; +} +} // namespace doris \ No newline at end of file diff --git a/be/src/runtime/struct_value.h b/be/src/runtime/struct_value.h new file mode 100644 index 0000000000..ec243d729c --- /dev/null +++ b/be/src/runtime/struct_value.h @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <type_traits> + +#include "runtime/primitive_type.h" + +namespace doris { + +class StructValue { +public: + StructValue() = default; + + explicit StructValue(uint32_t size) : _values(nullptr), _size(size), _has_null(false) {} + StructValue(void** values, uint32_t size) : _values(values), _size(size), _has_null(false) {} + StructValue(void** values, uint32_t size, bool has_null) + : _values(values), _size(size), _has_null(has_null) {} + + //void to_struct_val(StructVal* val) const; + //static StructValue from_struct_val(const StructVal& val); + + uint32_t size() const { return _size; } + void set_size(uint32_t size) { _size = size; } + bool has_null() const { return _has_null; } + void set_has_null(bool has_null) { _has_null = has_null; } + bool is_null_at(uint32_t index) const { + return this->_has_null && this->_values[index] == nullptr; + } + + void shallow_copy(const StructValue* other); + + // size_t get_byte_size(const TypeDescriptor& type) const; + + const void** values() const { return const_cast<const void**>(_values); } + void** mutable_values() { return _values; } + void set_values(void** values) { _values = values; } + const void* child_value(uint32_t index) const { return _values[index]; } + void* mutable_child_value(uint32_t index) { return _values[index]; } + void set_child_value(void* value, uint32_t index) { _values[index] = value; } + +private: + // pointer to the start of the vector of children pointers. These pointers are + // point to children values where a null pointer means that this child is NULL. + void** _values; + // the number of values in this struct value. + uint32_t _size; + // child has no null value if has_null is false. + // child may has null value if has_null is true. + bool _has_null; +}; +} // namespace doris \ No newline at end of file diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 37f4933569..f26b1dcbce 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -55,14 +55,31 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx) case TTypeNodeType::ARRAY: { DCHECK(!node.__isset.scalar_type); DCHECK_LT(*idx, types.size() - 1); + // contains_null should be always set in ArrayType + DCHECK(node.__isset.contains_null); type = TYPE_ARRAY; - if (node.__isset.contains_null) { - contains_null = node.contains_null; - } + contains_nulls.reserve(1); + contains_nulls.push_back(node.contains_null); ++(*idx); children.push_back(TypeDescriptor(types, idx)); break; } + case TTypeNodeType::STRUCT: { + DCHECK(!node.__isset.scalar_type); + DCHECK_LT(*idx, types.size() - 1); + DCHECK(!node.__isset.contains_null); + DCHECK(node.__isset.struct_fields); + DCHECK_GE(node.struct_fields.size(), 1); + type = TYPE_STRUCT; + contains_nulls.reserve(node.struct_fields.size()); + for (size_t i = 0; i < node.struct_fields.size(); i++) { + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + field_names.push_back(node.struct_fields[i].name); + contains_nulls.push_back(node.struct_fields[i].contains_null); + } + break; + } // case TTypeNodeType::STRUCT: // type = TYPE_STRUCT; // for (int i = 0; i < node.struct_fields.size(); ++i) { @@ -98,15 +115,17 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const { if (is_complex_type()) { if (type == TYPE_ARRAY) { node.type = TTypeNodeType::ARRAY; + node.contains_null = contains_nulls[0]; } else if (type == TYPE_MAP) { node.type = TTypeNodeType::MAP; } else { DCHECK_EQ(type, TYPE_STRUCT); node.type = TTypeNodeType::STRUCT; node.__set_struct_fields(std::vector<TStructField>()); - for (auto& field_name : field_names) { + for (size_t i = 0; i < field_names.size(); i++) { node.struct_fields.push_back(TStructField()); - node.struct_fields.back().name = field_name; + node.struct_fields.back().name = field_names[i]; + node.struct_fields.back().contains_null = contains_nulls[i]; } } for (const TypeDescriptor& child : children) { @@ -147,10 +166,12 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { scalar_type->set_scale(scale); } else if (type == TYPE_ARRAY) { node->set_type(TTypeNodeType::ARRAY); + node->set_contains_null(contains_nulls[0]); for (const TypeDescriptor& child : children) { child.to_protobuf(ptype); } } + // TODO(xy): support struct } TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNode>& types, int* idx) @@ -184,13 +205,15 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNod } case TTypeNodeType::ARRAY: { type = TYPE_ARRAY; + contains_nulls.push_back(true); if (node.has_contains_null()) { - contains_null = node.contains_null(); + contains_nulls[0] = node.contains_null(); } ++(*idx); children.push_back(TypeDescriptor(types, idx)); break; } + // TODO(xy): support struct default: DCHECK(false) << node.type(); } @@ -218,6 +241,19 @@ std::string TypeDescriptor::debug_string() const { ss << "ARRAY<" << children[0].debug_string() << ">"; return ss.str(); } + case TYPE_STRUCT: { + ss << "STRUCT<"; + for (size_t i = 0; i < children.size(); i++) { + ss << field_names[i]; + ss << ":"; + ss << children[i].debug_string(); + if (i != children.size() - 1) { + ss << ","; + } + } + ss << ">"; + return ss.str(); + } default: return type_to_string(type); } diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index aca6336f6b..dec60926dd 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -64,7 +64,7 @@ struct TypeDescriptor { std::vector<std::string> field_names; // Used for complex types only. - bool contains_null = true; + std::vector<bool> contains_nulls; TypeDescriptor() : type(INVALID_TYPE), len(-1), precision(-1), scale(-1) {} diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 49758f40d1..ec60fae072 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -89,6 +89,7 @@ public: TYPE_DECIMALV2, TYPE_OBJECT, TYPE_ARRAY, + TYPE_STRUCT, TYPE_QUANTILE_STATE, TYPE_DATEV2, TYPE_DATETIMEV2, diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index be5b60c6dd..7a569ded2e 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -136,6 +136,7 @@ set(VEC_FILES exprs/vexpr_context.cpp exprs/vliteral.cpp exprs/varray_literal.cpp + exprs/vstruct_literal.cpp exprs/vin_predicate.cpp exprs/vbloom_predicate.cpp exprs/vbitmap_predicate.cpp diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index 05d5df0fd4..c7e5c23d2a 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -48,8 +48,7 @@ ColumnStruct::ColumnStruct(MutableColumns&& mutable_columns) { columns.reserve(mutable_columns.size()); for (auto& column : mutable_columns) { if (is_column_const(*column)) { - throw Exception {"ColumnStruct cannot have ColumnConst as its element", - ErrorCodes::ILLEGAL_COLUMN}; + LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; } columns.push_back(std::move(column)); } @@ -59,8 +58,7 @@ ColumnStruct::ColumnStruct(Columns&& columns) { columns.reserve(columns.size()); for (auto& column : columns) { if (is_column_const(*column)) { - throw Exception {"ColumnStruct cannot have ColumnConst as its element", - ErrorCodes::ILLEGAL_COLUMN}; + LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; } columns.push_back(std::move(column)); } @@ -70,8 +68,7 @@ ColumnStruct::ColumnStruct(TupleColumns&& tuple_columns) { columns.reserve(tuple_columns.size()); for (auto& column : tuple_columns) { if (is_column_const(*column)) { - throw Exception {"ColumnStruct cannot have ColumnConst as its element", - ErrorCodes::ILLEGAL_COLUMN}; + LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; } columns.push_back(std::move(column)); } @@ -79,9 +76,9 @@ ColumnStruct::ColumnStruct(TupleColumns&& tuple_columns) { ColumnStruct::Ptr ColumnStruct::create(Columns& columns) { for (const auto& column : columns) { - if (is_column_const(*column)) - throw Exception {"ColumnStruct cannot have ColumnConst as its element", - ErrorCodes::ILLEGAL_COLUMN}; + if (is_column_const(*column)) { + LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; + } } auto column_struct = ColumnStruct::create(columns); return column_struct; @@ -90,8 +87,7 @@ ColumnStruct::Ptr ColumnStruct::create(Columns& columns) { ColumnStruct::Ptr ColumnStruct::create(TupleColumns& tuple_columns) { for (const auto& column : tuple_columns) { if (is_column_const(*column)) { - throw Exception {"ColumnStruct cannot have ColumnConst as its element", - ErrorCodes::ILLEGAL_COLUMN}; + LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element"; } } auto column_struct = ColumnStruct::create(tuple_columns); @@ -144,22 +140,11 @@ bool ColumnStruct::is_default_at(size_t n) const { return true; } -StringRef ColumnStruct::get_data_at(size_t) const { - throw Exception("Method get_data_at is not supported for " + get_name(), - ErrorCodes::NOT_IMPLEMENTED); -} - -void ColumnStruct::insert_data(const char*, size_t) { - throw Exception("Method insert_data is not supported for " + get_name(), - ErrorCodes::NOT_IMPLEMENTED); -} - void ColumnStruct::insert(const Field& x) { const auto& tuple = x.get<const Tuple&>(); const size_t tuple_size = columns.size(); if (tuple.size() != tuple_size) { - throw Exception("Cannot insert value of different size into tuple", - ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE); + LOG(FATAL) << "Cannot insert value of different size into tuple."; } for (size_t i = 0; i < tuple_size; ++i) { @@ -172,8 +157,7 @@ void ColumnStruct::insert_from(const IColumn& src_, size_t n) { const size_t tuple_size = columns.size(); if (src.columns.size() != tuple_size) { - throw Exception("Cannot insert value of different size into tuple", - ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE); + LOG(FATAL) << "Cannot insert value of different size into tuple."; } for (size_t i = 0; i < tuple_size; ++i) { diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 895a2796b7..75dade874d 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -102,7 +102,7 @@ public: std::string get_name() const override; const char* get_family_name() const override { return "Struct"; } TypeIndex get_data_type() const { return TypeIndex::Struct; } - + bool can_be_inside_nullable() const override { return true; } MutableColumnPtr clone_empty() const override; MutableColumnPtr clone_resized(size_t size) const override; @@ -112,8 +112,12 @@ public: void get(size_t n, Field& res) const override; bool is_default_at(size_t n) const override; - StringRef get_data_at(size_t n) const override; - void insert_data(const char* pos, size_t length) override; + [[noreturn]] StringRef get_data_at(size_t n) const override { + LOG(FATAL) << "Method get_data_at is not supported for " + get_name(); + } + [[noreturn]] void insert_data(const char* pos, size_t length) override { + LOG(FATAL) << "Method insert_data is not supported for " + get_name(); + } void insert(const Field& x) override; void insert_from(const IColumn& src_, size_t n) override; void insert_default() override; @@ -162,7 +166,10 @@ public: // int compare_at_with_collation(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint, // const Collator& collator) const override; - int compare_at(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint) const override; + [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs_, + int nan_direction_hint) const override { + LOG(FATAL) << "compare_at not implemented"; + } void get_extremes(Field& min, Field& max) const override; // void get_permutation(IColumn::PermutationSortDirection direction, diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index e622d979d5..7eec1eb2fd 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -29,6 +29,18 @@ DataTypePtr DataTypeFactory::create_data_type(const doris::Field& col_desc) { if (col_desc.type() == OLAP_FIELD_TYPE_ARRAY) { DCHECK(col_desc.get_sub_field_count() == 1); nested = std::make_shared<DataTypeArray>(create_data_type(*col_desc.get_sub_field(0))); + } else if (col_desc.type() == OLAP_FIELD_TYPE_STRUCT) { + DCHECK(col_desc.get_sub_field_count() >= 1); + size_t field_size = col_desc.get_sub_field_count(); + DataTypes dataTypes; + Strings names; + dataTypes.reserve(field_size); + names.reserve(field_size); + for (size_t i = 0; i < field_size; i++) { + dataTypes.push_back(create_data_type(*col_desc.get_sub_field(i))); + names.push_back(col_desc.name()); + } + nested = std::make_shared<DataTypeStruct>(dataTypes, names); } else { nested = _create_primitive_data_type(col_desc.type(), col_desc.get_precision(), col_desc.get_scale()); @@ -45,6 +57,18 @@ DataTypePtr DataTypeFactory::create_data_type(const TabletColumn& col_desc, bool if (col_desc.type() == OLAP_FIELD_TYPE_ARRAY) { DCHECK(col_desc.get_subtype_count() == 1); nested = std::make_shared<DataTypeArray>(create_data_type(col_desc.get_sub_column(0))); + } else if (col_desc.type() == OLAP_FIELD_TYPE_STRUCT) { + DCHECK(col_desc.get_subtype_count() >= 1); + size_t col_size = col_desc.get_subtype_count(); + DataTypes dataTypes; + Strings names; + dataTypes.reserve(col_size); + names.reserve(col_size); + for (size_t i = 0; i < col_size; i++) { + dataTypes.push_back(create_data_type(col_desc.get_sub_column(i))); + names.push_back(col_desc.name()); + } + nested = std::make_shared<DataTypeStruct>(dataTypes, names); } else { nested = _create_primitive_data_type(col_desc.type(), col_desc.precision(), col_desc.frac()); @@ -129,8 +153,23 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo case TYPE_ARRAY: DCHECK(col_desc.children.size() == 1); nested = std::make_shared<vectorized::DataTypeArray>( - create_data_type(col_desc.children[0], col_desc.contains_null)); + create_data_type(col_desc.children[0], col_desc.contains_nulls[0])); + break; + case TYPE_STRUCT: { + DCHECK(col_desc.children.size() >= 1); + size_t child_size = col_desc.children.size(); + DCHECK_EQ(col_desc.field_names.size(), child_size); + DataTypes dataTypes; + Strings names; + dataTypes.reserve(child_size); + names.reserve(child_size); + for (size_t i = 0; i < child_size; i++) { + dataTypes.push_back(create_data_type(col_desc.children[i], col_desc.contains_nulls[i])); + names.push_back(col_desc.field_names[i]); + } + nested = std::make_shared<DataTypeStruct>(dataTypes, names); break; + } case INVALID_TYPE: default: DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type; diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index ed270b40ea..d46dd3435d 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -41,6 +41,7 @@ #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" +#include "vec/data_types/data_type_struct.h" namespace doris::vectorized { @@ -95,7 +96,10 @@ public: }); return instance; } + + // TODO(xy): support creator to create dynamic struct type DataTypePtr get(const std::string& name) { return _data_type_map[name]; } + // TODO(xy): support creator to create dynamic struct type const std::string& get(const DataTypePtr& data_type) const { auto type_ptr = data_type->is_nullable() ? ((DataTypeNullable*)(data_type.get()))->get_nested_type() diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp index 91aff67a40..8fd2179bfa 100644 --- a/be/src/vec/data_types/data_type_struct.cpp +++ b/be/src/vec/data_types/data_type_struct.cpp @@ -43,16 +43,15 @@ DataTypeStruct::DataTypeStruct(const DataTypes& elems_) } } -static std::optional<Exception> check_tuple_names(const Strings& names) { +static Status check_tuple_names(const Strings& names) { std::unordered_set<String> names_set; for (const auto& name : names) { if (name.empty()) { - return Exception("Names of tuple elements cannot be empty", ErrorCodes::BAD_ARGUMENTS); + return Status::InvalidArgument("Names of tuple elements cannot be empty"); } if (!names_set.insert(name).second) { - return Exception("Names of tuple elements must be unique", - ErrorCodes::DUPLICATE_COLUMN); + return Status::InvalidArgument("Names of tuple elements must be unique"); } } @@ -63,13 +62,12 @@ DataTypeStruct::DataTypeStruct(const DataTypes& elems_, const Strings& names_) : elems(elems_), names(names_), have_explicit_names(true) { size_t size = elems.size(); if (names.size() != size) { - throw Exception("Wrong number of names passed to constructor of DataTypeStruct", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + LOG(FATAL) << "Wrong number of names passed to constructor of DataTypeStruct"; } - if (auto exception = check_tuple_names(names)) { - throw std::move(*exception); - } + Status st = check_tuple_names(names); + //if (!st.ok()) { + //} } std::string DataTypeStruct::do_get_name() const { @@ -114,8 +112,7 @@ static void add_element_safe(const DataTypes& elems, IColumn& column, F&& impl) if (element_column.size() != new_size) { // This is not a logical error because it may work with // user-supplied data. - throw Exception("Cannot read a tuple because not all elements are present", - ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH); + LOG(FATAL) << "Cannot read a tuple because not all elements are present"; } } } catch (...) { @@ -168,10 +165,11 @@ MutableColumnPtr DataTypeStruct::create_column() const { // return ColumnStruct::create(std::move(tuple_columns)); // } -// Field DataTypeStruct::get_default() const { -// return Tuple(collections::map<Tuple>( -// elems, [](const DataTypePtr& elem) { return elem->get_default(); })); -// } +Field DataTypeStruct::get_default() const { + return Tuple(); + //return Tuple(collections::map<Tuple>( + // elems, [](const DataTypePtr& elem) { return elem->get_default(); })); +} void DataTypeStruct::insert_default_into(IColumn& column) const { add_element_safe(elems, column, [&] { @@ -210,8 +208,7 @@ size_t DataTypeStruct::get_position_by_name(const String& name) const { return i; } } - throw Exception("Struct doesn't have element with name '" + name + "'", - ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + LOG(FATAL) << "Struct doesn't have element with name '" + name + "'"; } std::optional<size_t> DataTypeStruct::try_get_position_by_name(const String& name) const { @@ -229,7 +226,7 @@ String DataTypeStruct::get_name_by_position(size_t i) const { fmt::memory_buffer error_msg; fmt::format_to(error_msg, "Index of tuple element ({}) if out range ([1, {}])", i, names.size()); - throw Exception(fmt::to_string(error_msg), ErrorCodes::ILLEGAL_INDEX); + LOG(FATAL) << fmt::to_string(error_msg); } return names[i - 1]; diff --git a/be/src/vec/data_types/data_type_struct.h b/be/src/vec/data_types/data_type_struct.h index 4201583ef4..9405544b76 100644 --- a/be/src/vec/data_types/data_type_struct.h +++ b/be/src/vec/data_types/data_type_struct.h @@ -62,7 +62,7 @@ public: std::string do_get_name() const override; const char* get_family_name() const override { return "Struct"; } - bool can_be_inside_nullable() const override { return false; } + bool can_be_inside_nullable() const override { return true; } bool supports_sparse_serialization() const { return true; } MutableColumnPtr create_column() const override; @@ -90,16 +90,18 @@ public: std::optional<size_t> try_get_position_by_name(const String& name) const; String get_name_by_position(size_t i) const; - int64_t get_uncompressed_serialized_bytes(const IColumn& column, - int be_exec_version) const override { + [[noreturn]] int64_t get_uncompressed_serialized_bytes(const IColumn& column, + int be_exec_version) const override { LOG(FATAL) << "get_uncompressed_serialized_bytes not implemented"; } - char* serialize(const IColumn& column, char* buf, int be_exec_version) const override { + [[noreturn]] char* serialize(const IColumn& column, char* buf, + int be_exec_version) const override { LOG(FATAL) << "serialize not implemented"; } - const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override { + [[noreturn]] const char* deserialize(const char* buf, IColumn* column, + int be_exec_version) const override { LOG(FATAL) << "serialize not implemented"; } diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp index 68f3b05cc1..6f90ede85b 100644 --- a/be/src/vec/exprs/vcast_expr.cpp +++ b/be/src/vec/exprs/vcast_expr.cpp @@ -38,6 +38,7 @@ doris::Status VCastExpr::prepare(doris::RuntimeState* state, const doris::RowDes // create a const string column _target_data_type = _data_type; + // TODO(xy): support return struct type name _target_data_type_name = DataTypeFactory::instance().get(_target_data_type); _cast_param_data_type = std::make_shared<DataTypeString>(); _cast_param = _cast_param_data_type->create_column_const(1, _target_data_type_name); diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 9ad3ccc7e8..923aee07d6 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -35,6 +35,7 @@ #include "vec/exprs/vliteral.h" #include "vec/exprs/vruntimefilter_wrapper.h" #include "vec/exprs/vslot_ref.h" +#include "vec/exprs/vstruct_literal.h" #include "vec/exprs/vtuple_is_null_predicate.h" namespace doris::vectorized { @@ -124,6 +125,10 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr *expr = pool->add(new VArrayLiteral(texpr_node)); return Status::OK(); } + case TExprNodeType::STRUCT_LITERAL: { + *expr = pool->add(new VStructLiteral(texpr_node)); + return Status::OK(); + } case doris::TExprNodeType::SLOT_REF: { *expr = pool->add(new VSlotRef(texpr_node)); break; diff --git a/be/src/vec/exprs/vstruct_literal.cpp b/be/src/vec/exprs/vstruct_literal.cpp new file mode 100644 index 0000000000..9ae647b481 --- /dev/null +++ b/be/src/vec/exprs/vstruct_literal.cpp @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/vstruct_literal.h" + +namespace doris::vectorized { + +Status VStructLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, + VExprContext* context) { + RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); + Field struct_field = Tuple(); + for (const auto child : _children) { + Field item; + ColumnPtrWrapper* const_col_wrapper = nullptr; + RETURN_IF_ERROR(child->get_const_col(context, &const_col_wrapper)); + const_col_wrapper->column_ptr->get(0, item); + struct_field.get<Tuple>().push_back(item); + } + _column_ptr = _data_type->create_column_const(1, struct_field); + return Status::OK(); +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vstruct_literal.h b/be/src/vec/exprs/vstruct_literal.h new file mode 100644 index 0000000000..4cc4fe78d6 --- /dev/null +++ b/be/src/vec/exprs/vstruct_literal.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "vec/exprs/vliteral.h" + +namespace doris { + +namespace vectorized { +class VStructLiteral : public VLiteral { +public: + VStructLiteral(const TExprNode& node) : VLiteral(node, false) {} + ~VStructLiteral() override = default; + Status prepare(RuntimeState* state, const RowDescriptor& row_desc, + VExprContext* context) override; +}; +} // namespace vectorized + +} // namespace doris diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index e3baaecdd2..d300c09a76 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -1754,6 +1754,7 @@ protected: LOG(FATAL) << fmt::format( "Second argument to {} must be a constant string describing type", get_name()); } + // TODO(xy): support return struct type for factory auto type = DataTypeFactory::instance().get(type_col->get_value<String>()); DCHECK(type != nullptr); diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index e9b4c8fd24..0b3ad4ae03 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -20,8 +20,10 @@ #include "olap/tablet_schema.h" #include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" +#include "vec/columns/column_struct.h" #include "vec/columns/column_vector.h" #include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_struct.h" namespace doris::vectorized { @@ -114,6 +116,14 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co case FieldType::OLAP_FIELD_TYPE_DOUBLE: { return std::make_unique<OlapColumnDataConvertorSimple<vectorized::Float64>>(); } + case FieldType::OLAP_FIELD_TYPE_STRUCT: { + std::vector<OlapColumnDataConvertorBaseUPtr> sub_convertors; + for (uint32_t i = 0; i < column.get_subtype_count(); i++) { + const TabletColumn& sub_column = column.get_sub_column(i); + sub_convertors.emplace_back(create_olap_column_data_convertor(sub_column)); + } + return std::make_unique<OlapColumnDataConvertorStruct>(sub_convertors); + } case FieldType::OLAP_FIELD_TYPE_ARRAY: { const auto& sub_column = column.get_sub_column(0); return std::make_unique<OlapColumnDataConvertorArray>( @@ -639,6 +649,58 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorDecimal::convert_to_olap() return Status::OK(); } +void OlapBlockDataConvertor::OlapColumnDataConvertorStruct::set_source_column( + const ColumnWithTypeAndName& typed_column, size_t row_pos, size_t num_rows) { + OlapBlockDataConvertor::OlapColumnDataConvertorBase::set_source_column(typed_column, row_pos, + num_rows); +} + +const void* OlapBlockDataConvertor::OlapColumnDataConvertorStruct::get_data() const { + return _results[0]; +} + +const void* OlapBlockDataConvertor::OlapColumnDataConvertorStruct::get_data_at( + size_t offset) const { + // Todo(xy): struct not supported + return nullptr; +} + +Status OlapBlockDataConvertor::OlapColumnDataConvertorStruct::convert_to_olap() { + assert(_typed_column.column); + const vectorized::ColumnStruct* column_struct = nullptr; + const vectorized::DataTypeStruct* data_type_struct = nullptr; + if (_nullmap) { + auto nullable_column = + assert_cast<const vectorized::ColumnNullable*>(_typed_column.column.get()); + column_struct = assert_cast<const vectorized::ColumnStruct*>( + nullable_column->get_nested_column_ptr().get()); + data_type_struct = assert_cast<const DataTypeStruct*>( + (assert_cast<const DataTypeNullable*>(_typed_column.type.get())->get_nested_type()) + .get()); + } else { + column_struct = assert_cast<const vectorized::ColumnStruct*>(_typed_column.column.get()); + data_type_struct = assert_cast<const DataTypeStruct*>(_typed_column.type.get()); + } + assert(column_struct); + assert(data_type_struct); + + size_t data_size = column_struct->tuple_size(); + size_t data_cursor = 0; + size_t null_map_cursor = data_cursor + data_size; + for (size_t i = 0; i < data_size; i++) { + ColumnPtr sub_column = column_struct->get_column_ptr(i); + DataTypePtr sub_type = data_type_struct->get_element(i); + ColumnWithTypeAndName sub_typed_column = {sub_column, sub_type, ""}; + _sub_convertors[i]->set_source_column(sub_typed_column, _row_pos, _num_rows); + _sub_convertors[i]->convert_to_olap(); + _results[data_cursor] = _sub_convertors[i]->get_data(); + _results[null_map_cursor] = _sub_convertors[i]->get_nullmap(); + data_cursor++; + null_map_cursor++; + } + return Status::OK(); +} + Status OlapBlockDataConvertor::OlapColumnDataConvertorArray::convert_to_olap() { const ColumnArray* column_array = nullptr; const DataTypeArray* data_type_array = nullptr; diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 6898b44a9d..aafbe8d3e3 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -359,6 +359,27 @@ private: } }; + class OlapColumnDataConvertorStruct : public OlapColumnDataConvertorBase { + public: + OlapColumnDataConvertorStruct( + std::vector<OlapColumnDataConvertorBaseUPtr>& sub_convertors) { + for (auto& sub_convertor : sub_convertors) { + _sub_convertors.push_back(std::move(sub_convertor)); + } + size_t allocate_size = _sub_convertors.size() * 2; + _results.resize(allocate_size); + } + void set_source_column(const ColumnWithTypeAndName& typed_column, size_t row_pos, + size_t num_rows) override; + const void* get_data() const override; + const void* get_data_at(size_t offset) const override; + Status convert_to_olap() override; + + private: + std::vector<OlapColumnDataConvertorBaseUPtr> _sub_convertors; + std::vector<const void*> _results; + }; + class OlapColumnDataConvertorArray : public OlapColumnDataConvertorPaddedPODArray<CollectionValue> { public: diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index 4c7f5028d7..2accb4dd03 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -29,6 +29,7 @@ #include "vec/common/assert_cast.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_struct.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" #include "vec/runtime/vdatetime_value.h" @@ -61,8 +62,8 @@ void VMysqlResultWriter::_init_profile() { template <PrimitiveType type, bool is_nullable> Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, - std::unique_ptr<TFetchDataResult>& result, - const DataTypePtr& nested_type_ptr, int scale) { + std::unique_ptr<TFetchDataResult>& result, int scale, + const DataTypes& sub_types) { SCOPED_TIMER(_convert_tuple_timer); const auto row_size = column_ptr->size(); @@ -145,6 +146,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } } else if constexpr (type == TYPE_ARRAY) { + DCHECK_EQ(sub_types.size(), 1); auto& column_array = assert_cast<const ColumnArray&>(*column); auto& offsets = column_array.get_offsets(); for (ssize_t i = 0; i < row_size; ++i) { @@ -172,12 +174,12 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, if (data->is_null_at(j)) { buf_ret = _buffer.push_string("NULL", strlen("NULL")); } else { - if (WhichDataType(remove_nullable(nested_type_ptr)).is_string()) { + if (WhichDataType(remove_nullable(sub_types[0])).is_string()) { buf_ret = _buffer.push_string("'", 1); - buf_ret = _add_one_cell(data, j, nested_type_ptr, _buffer); + buf_ret = _add_one_cell(data, j, sub_types[0], _buffer); buf_ret = _buffer.push_string("'", 1); } else { - buf_ret = _add_one_cell(data, j, nested_type_ptr, _buffer); + buf_ret = _add_one_cell(data, j, sub_types[0], _buffer); } } begin = false; @@ -186,8 +188,51 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, _buffer.close_dynamic_mode(); result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } + } else if constexpr (type == TYPE_STRUCT) { + DCHECK_GE(sub_types.size(), 1); + auto& column_struct = assert_cast<const ColumnStruct&>(*column); + for (ssize_t i = 0; i < row_size; ++i) { + if (0 != buf_ret) { + return Status::InternalError("pack mysql buffer failed."); + } + _buffer.reset(); + + if constexpr (is_nullable) { + if (column_ptr->is_null_at(i)) { + buf_ret = _buffer.push_null(); + result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); + continue; + } + } + + _buffer.open_dynamic_mode(); + buf_ret = _buffer.push_string("{", 1); + bool begin = true; + for (size_t j = 0; j < sub_types.size(); ++j) { + if (!begin) { + buf_ret = _buffer.push_string(", ", 2); + } + const auto& data = column_struct.get_column_ptr(j); + if (data->is_null_at(j)) { + buf_ret = _buffer.push_string("NULL", strlen("NULL")); + } else { + if (WhichDataType(remove_nullable(sub_types[j])).is_string()) { + buf_ret = _buffer.push_string("'", 1); + buf_ret = _add_one_cell(data, j, sub_types[j], _buffer); + buf_ret = _buffer.push_string("'", 1); + } else { + buf_ret = _add_one_cell(data, j, sub_types[j], _buffer); + } + } + begin = false; + } + buf_ret = _buffer.push_string("}", 1); + _buffer.close_dynamic_mode(); + result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); + } } else if constexpr (type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 || type == TYPE_DECIMAL128I) { + DCHECK_EQ(sub_types.size(), 1); for (int i = 0; i < row_size; ++i) { if (0 != buf_ret) { return Status::InternalError("pack mysql buffer failed."); @@ -201,7 +246,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, continue; } } - std::string decimal_str = nested_type_ptr->to_string(*column, i); + std::string decimal_str = sub_types[0]->to_string(*column, i); buf_ret = _buffer.push_string(decimal_str.c_str(), decimal_str.length()); result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } @@ -397,6 +442,7 @@ int VMysqlResultWriter::_add_one_cell(const ColumnPtr& column_ptr, size_t row_id auto decimal_str = assert_cast<const DataTypeDecimal<Decimal128I>*>(nested_type.get()) ->to_string(*column, row_idx); return buffer.push_string(decimal_str.c_str(), decimal_str.length()); + // TODO(xy): support nested struct } else if (which.is_array()) { auto& column_array = assert_cast<const ColumnArray&>(*column); auto& offsets = column_array.get_offsets(); @@ -554,10 +600,10 @@ Status VMysqlResultWriter::append_block(Block& input_block) { auto& nested_type = assert_cast<const DataTypeNullable&>(*type_ptr).get_nested_type(); status = _add_one_column<PrimitiveType::TYPE_DECIMALV2, true>(column_ptr, result, - nested_type, scale); + scale, {nested_type}); } else { status = _add_one_column<PrimitiveType::TYPE_DECIMALV2, false>(column_ptr, result, - type_ptr, scale); + scale, {type_ptr}); } break; } @@ -566,10 +612,10 @@ Status VMysqlResultWriter::append_block(Block& input_block) { auto& nested_type = assert_cast<const DataTypeNullable&>(*type_ptr).get_nested_type(); status = _add_one_column<PrimitiveType::TYPE_DECIMAL32, true>(column_ptr, result, - nested_type, scale); + scale, {nested_type}); } else { status = _add_one_column<PrimitiveType::TYPE_DECIMAL32, false>(column_ptr, result, - type_ptr, scale); + scale, {type_ptr}); } break; } @@ -578,10 +624,10 @@ Status VMysqlResultWriter::append_block(Block& input_block) { auto& nested_type = assert_cast<const DataTypeNullable&>(*type_ptr).get_nested_type(); status = _add_one_column<PrimitiveType::TYPE_DECIMAL64, true>(column_ptr, result, - nested_type, scale); + scale, {nested_type}); } else { status = _add_one_column<PrimitiveType::TYPE_DECIMAL64, false>(column_ptr, result, - type_ptr, scale); + scale, {type_ptr}); } break; } @@ -589,11 +635,11 @@ Status VMysqlResultWriter::append_block(Block& input_block) { if (type_ptr->is_nullable()) { auto& nested_type = assert_cast<const DataTypeNullable&>(*type_ptr).get_nested_type(); - status = _add_one_column<PrimitiveType::TYPE_DECIMAL128I, true>(column_ptr, result, - nested_type, scale); + status = _add_one_column<PrimitiveType::TYPE_DECIMAL128I, true>( + column_ptr, result, scale, {nested_type}); } else { status = _add_one_column<PrimitiveType::TYPE_DECIMAL128I, false>(column_ptr, result, - type_ptr, scale); + scale, {type_ptr}); } break; } @@ -625,10 +671,10 @@ Status VMysqlResultWriter::append_block(Block& input_block) { case TYPE_DATETIMEV2: { if (type_ptr->is_nullable()) { status = _add_one_column<PrimitiveType::TYPE_DATETIMEV2, true>(column_ptr, result, - nullptr, scale); + scale); } else { status = _add_one_column<PrimitiveType::TYPE_DATETIMEV2, false>(column_ptr, result, - nullptr, scale); + scale); } break; } @@ -646,12 +692,26 @@ Status VMysqlResultWriter::append_block(Block& input_block) { auto& nested_type = assert_cast<const DataTypeNullable&>(*type_ptr).get_nested_type(); auto& sub_type = assert_cast<const DataTypeArray&>(*nested_type).get_nested_type(); - status = _add_one_column<PrimitiveType::TYPE_ARRAY, true>(column_ptr, result, - sub_type); + status = _add_one_column<PrimitiveType::TYPE_ARRAY, true>(column_ptr, result, scale, + {sub_type}); } else { auto& sub_type = assert_cast<const DataTypeArray&>(*type_ptr).get_nested_type(); status = _add_one_column<PrimitiveType::TYPE_ARRAY, false>(column_ptr, result, - sub_type); + scale, {sub_type}); + } + break; + } + case TYPE_STRUCT: { + if (type_ptr->is_nullable()) { + auto& nested_type = + assert_cast<const DataTypeNullable&>(*type_ptr).get_nested_type(); + auto& sub_types = assert_cast<const DataTypeStruct&>(*nested_type).get_elements(); + status = _add_one_column<PrimitiveType::TYPE_STRUCT, true>(column_ptr, result, + scale, sub_types); + } else { + auto& sub_types = assert_cast<const DataTypeStruct&>(*type_ptr).get_elements(); + status = _add_one_column<PrimitiveType::TYPE_STRUCT, false>(column_ptr, result, + scale, sub_types); } break; } diff --git a/be/src/vec/sink/vmysql_result_writer.h b/be/src/vec/sink/vmysql_result_writer.h index 3f79f0e2d6..db61dcbf41 100644 --- a/be/src/vec/sink/vmysql_result_writer.h +++ b/be/src/vec/sink/vmysql_result_writer.h @@ -49,7 +49,7 @@ private: template <PrimitiveType type, bool is_nullable> Status _add_one_column(const ColumnPtr& column_ptr, std::unique_ptr<TFetchDataResult>& result, - const DataTypePtr& nested_type_ptr = nullptr, int scale = -1); + int scale = -1, const DataTypes& sub_types = DataTypes()); int _add_one_cell(const ColumnPtr& column_ptr, size_t row_idx, const DataTypePtr& type, MysqlRowBuffer& buffer); diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index 32bb070d22..2f4cc66cc9 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -1403,11 +1403,12 @@ Status VOlapTableSink::_validate_column(RuntimeState* state, const TypeDescripto } fmt::format_to(error_prefix, "ARRAY type failed: "); RETURN_IF_ERROR(_validate_column( - state, nested_type, nested_type.contains_null, column_array->get_data_ptr(), + state, nested_type, nested_type.contains_nulls[0], column_array->get_data_ptr(), slot_index, filter_bitmap, stop_processing, error_prefix, &permutation)); } break; } + // TODO(xy): add struct type validate default: break; } diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index c81e8b6423..35be33e918 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -618,7 +618,7 @@ terminal String KW_TYPECAST, KW_HISTOGRAM; -terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; +terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, LBRACE, RBRACE, DIVIDE, MOD, ADD, SUBTRACT; terminal BITAND, BITOR, BITXOR, BITNOT; terminal EQUAL, NOT, LESSTHAN, GREATERTHAN, SET_VAR; terminal COMMENTED_PLAN_HINT_START, COMMENTED_PLAN_HINT_END; @@ -729,6 +729,7 @@ nonterminal Expr function_call_expr, array_expr; nonterminal ArrayLiteral array_literal; nonterminal StructField struct_field; nonterminal ArrayList<StructField> struct_field_list; +nonterminal StructLiteral struct_literal; nonterminal AnalyticWindow opt_window_clause; nonterminal AnalyticWindow.Type window_type; nonterminal AnalyticWindow.Boundary window_boundary; @@ -928,6 +929,7 @@ precedence left KW_PARTITION; precedence left KW_PARTITIONS; precedence right KW_TEMPORARY; precedence right LBRACKET; +precedence right LBRACE; precedence left KW_ENGINE; // unused @@ -5835,16 +5837,23 @@ struct_field ::= ; struct_field_list ::= - struct_field:field - {: - RESULT = Lists.newArrayList(field); - :} - | struct_field_list:fields COMMA struct_field:field - {: - fields.add(field); - RESULT = fields; - :} - ; + struct_field:field + {: + RESULT = Lists.newArrayList(field); + :} + | struct_field_list:fields COMMA struct_field:field + {: + fields.add(field); + RESULT = fields; + :} + ; + +struct_literal ::= + LBRACE expr_list:list RBRACE + {: + RESULT = new StructLiteral(list.toArray(new LiteralExpr[0])); + :} + ; exists_predicate ::= KW_EXISTS subquery:s @@ -5868,6 +5877,8 @@ non_pred_expr ::= {: RESULT = a; :} | array_literal:a {: RESULT = a; :} + | struct_literal:s + {: RESULT = s; :} | function_call_expr:e {: RESULT = e; :} | KW_DATE STRING_LITERAL:l diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java index 66670db2f2..3f57f92af8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java @@ -292,6 +292,10 @@ public class ColumnDef { } if (type.getPrimitiveType() == PrimitiveType.STRUCT) { + if (isKey()) { + throw new AnalysisException("Struct can only be used in the non-key column of" + + " the duplicate table at present."); + } if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { throw new AnalysisException("Struct type column default value just support null"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index 37d73d7bb0..72fbfcadd0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -336,7 +336,7 @@ public class CreateTableStmt extends DdlStmt { // So the float and double could not be the first column in OLAP table. if (keysColumnNames.isEmpty()) { throw new AnalysisException("The olap table first column could not be float, double, string" - + " or array, please use decimal or varchar instead."); + + " or array, struct, map, please use decimal or varchar instead."); } keysDesc = new KeysDesc(KeysType.DUP_KEYS, keysColumnNames); } @@ -390,14 +390,14 @@ public class CreateTableStmt extends DdlStmt { for (ColumnDef columnDef : columnDefs) { columnDef.analyze(engineName.equals("olap")); - if (columnDef.getType().isArrayType() && engineName.equals("olap")) { + if (columnDef.getType().isComplexType()) { if (columnDef.getAggregateType() != null && columnDef.getAggregateType() != AggregateType.NONE) { - throw new AnalysisException("Array column can't support aggregation " - + columnDef.getAggregateType()); + throw new AnalysisException(columnDef.getType().getPrimitiveType() + + " column can't support aggregation " + columnDef.getAggregateType()); } if (columnDef.isKey()) { - throw new AnalysisException("Array can only be used in the non-key column of" - + " the duplicate table at present."); + throw new AnalysisException(columnDef.getType().getPrimitiveType() + + " can only be used in the non-key column of the duplicate table at present."); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 35bd80e9a3..31b2382f0a 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -1790,7 +1790,8 @@ public abstract class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl ARRAY_LITERAL(13), CAST_EXPR(14), JSON_LITERAL(15), - ARITHMETIC_EXPR(16); + ARITHMETIC_EXPR(16), + STRUCT_LITERAL(17); private static Map<Integer, ExprSerCode> codeMap = Maps.newHashMap(); @@ -1842,6 +1843,8 @@ public abstract class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl output.writeInt(ExprSerCode.FUNCTION_CALL.getCode()); } else if (expr instanceof ArrayLiteral) { output.writeInt(ExprSerCode.ARRAY_LITERAL.getCode()); + } else if (expr instanceof StructLiteral) { + output.writeInt(ExprSerCode.STRUCT_LITERAL.getCode()); } else if (expr instanceof CastExpr) { output.writeInt(ExprSerCode.CAST_EXPR.getCode()); } else if (expr instanceof ArithmeticExpr) { @@ -1891,6 +1894,8 @@ public abstract class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl return FunctionCallExpr.read(in); case ARRAY_LITERAL: return ArrayLiteral.read(in); + case STRUCT_LITERAL: + return StructLiteral.read(in); case CAST_EXPR: return CastExpr.read(in); case ARITHMETIC_EXPR: diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java new file mode 100644 index 0000000000..a9209f1c5a --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.thrift.TExprNode; +import org.apache.doris.thrift.TExprNodeType; + +import org.apache.commons.lang.StringUtils; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class StructLiteral extends LiteralExpr { + // only for persist + public StructLiteral() { + type = new StructType(); + children = new ArrayList<>(); + } + + public StructLiteral(LiteralExpr... exprs) throws AnalysisException { + type = new StructType(); + children = new ArrayList<>(); + for (LiteralExpr expr : exprs) { + if (!type.supportSubType(expr.getType())) { + throw new AnalysisException("Invalid element type in STRUCT."); + } + ((StructType) type).addField(new StructField(expr.getType())); + children.add(expr); + } + } + + protected StructLiteral(StructLiteral other) { + super(other); + } + + @Override + protected String toSqlImpl() { + List<String> list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.toDigestImpl())); + return "STRUCT(" + StringUtils.join(list, ", ") + ")"; + } + + @Override + public String toDigestImpl() { + List<String> list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.toDigestImpl())); + return "STRUCT(" + StringUtils.join(list, ", ") + ")"; + } + + @Override + public String getStringValue() { + List<String> list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.getStringValue())); + return "{" + StringUtils.join(list, ", ") + "}"; + } + + @Override + public String getStringValueForArray() { + return null; + } + + @Override + protected void toThrift(TExprNode msg) { + msg.node_type = TExprNodeType.STRUCT_LITERAL; + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + out.writeInt(children.size()); + for (Expr e : children) { + Expr.writeTo(e, out); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + int size = in.readInt(); + children = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + children.add(Expr.readIn(in)); + } + } + + public static StructLiteral read(DataInput in) throws IOException { + StructLiteral literal = new StructLiteral(); + literal.readFields(in); + return literal; + } + + @Override + public Expr clone() { + return new StructLiteral(this); + } + + @Override + public boolean isMinValue() { + return false; + } + + @Override + public int compareLiteral(LiteralExpr expr) { + return 0; + } + + @Override + public Expr uncheckedCastTo(Type targetType) throws AnalysisException { + if (!targetType.isStructType()) { + return super.uncheckedCastTo(targetType); + } + ArrayList<StructField> fields = ((StructType) targetType).getFields(); + StructLiteral literal = new StructLiteral(this); + for (int i = 0; i < children.size(); ++ i) { + Expr child = children.get(i); + literal.children.set(i, child.uncheckedCastTo((fields.get(i).getType()))); + } + literal.setType(targetType); + return literal; + } + + @Override + public void checkValueValid() throws AnalysisException { + for (Expr e : children) { + e.checkValueValid(); + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java index 4246edb5e4..c318e067a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java @@ -172,7 +172,7 @@ public class Subquery extends Expr { fieldName = "_" + Integer.toString(i); } Preconditions.checkNotNull(fieldName); - structFields.add(new StructField(fieldName, expr.getType(), null)); + structFields.add(new StructField(fieldName, expr.getType())); } Preconditions.checkState(structFields.size() != 0); return new StructType(structFields); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java index df00ef62b5..5690c115c1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java @@ -30,6 +30,8 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; /** * Represents an anonymous type definition, e.g., used in DDL and CASTs. @@ -94,45 +96,46 @@ public class TypeDef implements ParseNode { if (type.isArrayType()) { Type itemType = ((ArrayType) type).getItemType(); if (itemType instanceof ScalarType) { - analyzeNestedType((ScalarType) itemType); + analyzeNestedType(type, (ScalarType) itemType); } } if (type.isMapType()) { ScalarType keyType = (ScalarType) ((MapType) type).getKeyType(); ScalarType valueType = (ScalarType) ((MapType) type).getKeyType(); - analyzeNestedType(keyType); - analyzeNestedType(valueType); + analyzeNestedType(type, keyType); + analyzeNestedType(type, valueType); } if (type.isStructType()) { ArrayList<StructField> fields = ((StructType) type).getFields(); - for (int i = 0; i < fields.size(); i++) { - ScalarType filedType = (ScalarType) fields.get(i).getType(); - analyzeNestedType(filedType); + Set<String> fieldNames = new HashSet<>(); + for (StructField field : fields) { + Type fieldType = field.getType(); + if (fieldType instanceof ScalarType) { + analyzeNestedType(type, (ScalarType) fieldType); + if (!fieldNames.add(field.getName())) { + throw new AnalysisException("Duplicate field name " + + field.getName() + " in struct " + type.toSql()); + } + } } } } } - private void analyzeNestedType(ScalarType type) throws AnalysisException { - if (type.isNull()) { - throw new AnalysisException("Unsupported data type: " + type.toSql()); - } - // check whether the array sub-type is supported - Boolean isSupportType = false; - for (Type subType : Type.getArraySubTypes()) { - if (type.getPrimitiveType() == subType.getPrimitiveType()) { - isSupportType = true; - break; - } + private void analyzeNestedType(Type parent, ScalarType child) throws AnalysisException { + if (child.isNull()) { + throw new AnalysisException("Unsupported data type: " + child.toSql()); } - if (!isSupportType) { - throw new AnalysisException("Array unsupported sub-type: " + type.toSql()); + // check whether the sub-type is supported + if (!parent.supportSubType(child)) { + throw new AnalysisException( + parent.getPrimitiveType() + "unsupported sub-type: " + child.toSql()); } - if (type.getPrimitiveType().isStringType() && !type.isLengthSet()) { - type.setLength(1); + if (child.getPrimitiveType().isStringType() && !child.isLengthSet()) { + child.setLength(1); } - analyze(type); + analyze(child); } private void analyzeScalarType(ScalarType scalarType) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java index 45d01019d1..8aa6acb31a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java @@ -159,6 +159,16 @@ public class ArrayType extends Type { return !itemType.isNull(); } + @Override + public boolean supportSubType(Type subType) { + for (Type supportedType : getArraySubTypes()) { + if (subType.getPrimitiveType() == supportedType.getPrimitiveType()) { + return true; + } + } + return false; + } + @Override public String toString() { return toSql(0).toUpperCase(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index ae96365332..9efe901907 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -111,7 +111,7 @@ public class Column implements Writable, GsonPostProcessable { this.stats = new ColumnStats(); this.visible = true; this.defineExpr = null; - this.children = new ArrayList<>(Type.MAX_NESTING_DEPTH); + this.children = new ArrayList<>(); this.uniqueId = -1; } @@ -160,7 +160,7 @@ public class Column implements Writable, GsonPostProcessable { this.comment = comment; this.stats = new ColumnStats(); this.visible = visible; - this.children = new ArrayList<>(Type.MAX_NESTING_DEPTH); + this.children = new ArrayList<>(); createChildrenColumn(this.type, this); this.uniqueId = colUniqueId; } @@ -187,6 +187,13 @@ public class Column implements Writable, GsonPostProcessable { Column c = new Column(COLUMN_ARRAY_CHILDREN, ((ArrayType) type).getItemType()); c.setIsAllowNull(((ArrayType) type).getContainsNull()); column.addChildrenColumn(c); + } else if (type.isStructType()) { + ArrayList<StructField> fields = ((StructType) type).getFields(); + for (StructField field : fields) { + Column c = new Column(field.getName(), field.getType()); + c.setIsAllowNull(field.getContainsNull()); + column.addChildrenColumn(c); + } } } @@ -401,34 +408,41 @@ public class Column implements Writable, GsonPostProcessable { return tColumn; } + private void setChildrenTColumn(Column children, TColumn tColumn) { + TColumn childrenTColumn = new TColumn(); + childrenTColumn.setColumnName(children.name); + + TColumnType childrenTColumnType = new TColumnType(); + childrenTColumnType.setType(children.getDataType().toThrift()); + childrenTColumnType.setLen(children.getStrLen()); + childrenTColumnType.setPrecision(children.getPrecision()); + childrenTColumnType.setScale(children.getScale()); + childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize()); + + childrenTColumn.setColumnType(childrenTColumnType); + childrenTColumn.setIsAllowNull(children.isAllowNull()); + // TODO: If we don't set the aggregate type for children, the type will be + // considered as TAggregationType::SUM after deserializing in BE. + // For now, we make children inherit the aggregate type from their parent. + if (tColumn.getAggregationType() != null) { + childrenTColumn.setAggregationType(tColumn.getAggregationType()); + } + + tColumn.children_column.add(childrenTColumn); + toChildrenThrift(children, childrenTColumn); + } + private void toChildrenThrift(Column column, TColumn tColumn) { if (column.type.isArrayType()) { Column children = column.getChildren().get(0); - - TColumn childrenTColumn = new TColumn(); - childrenTColumn.setColumnName(children.name); - - TColumnType childrenTColumnType = new TColumnType(); - childrenTColumnType.setType(children.getDataType().toThrift()); - childrenTColumnType.setType(children.getDataType().toThrift()); - childrenTColumnType.setLen(children.getStrLen()); - childrenTColumnType.setPrecision(children.getPrecision()); - childrenTColumnType.setScale(children.getScale()); - - childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize()); - childrenTColumn.setColumnType(childrenTColumnType); - childrenTColumn.setIsAllowNull(children.isAllowNull()); - // TODO: If we don't set the aggregate type for children, the type will be - // considered as TAggregationType::SUM after deserializing in BE. - // For now, we make children inherit the aggregate type from their parent. - if (tColumn.getAggregationType() != null) { - childrenTColumn.setAggregationType(tColumn.getAggregationType()); - } - tColumn.setChildrenColumn(new ArrayList<>()); - tColumn.children_column.add(childrenTColumn); - - toChildrenThrift(children, childrenTColumn); + setChildrenTColumn(children, tColumn); + } else if (column.type.isStructType()) { + List<Column> childrenColumns = column.getChildren(); + tColumn.setChildrenColumn(new ArrayList<>()); + for (Column children : childrenColumns) { + setChildrenTColumn(children, tColumn); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java index 1a746a2374..adf74f5c6a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java @@ -88,6 +88,11 @@ public class MapType extends Type { return String.format("%sMAP<%s,%s>", leftPadding, keyType.toSql(), structStr); } + @Override + public boolean supportSubType(Type subType) { + return true; + } + @Override public void toThrift(TTypeDesc container) { TTypeNode node = new TTypeNode(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java index e91face521..9e279c8d2d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java @@ -68,7 +68,10 @@ public enum PrimitiveType { // sizeof(CollectionValue) ARRAY("ARRAY", 32, TPrimitiveType.ARRAY), MAP("MAP", 24, TPrimitiveType.MAP), - STRUCT("STRUCT", 24, TPrimitiveType.STRUCT), + // sizeof(StructValue) + // 8-byte pointer and 4-byte size and 1 bytes has_null (13 bytes total) + // Aligning to 16 bytes total. + STRUCT("STRUCT", 16, TPrimitiveType.STRUCT), STRING("STRING", 16, TPrimitiveType.STRING), // Unsupported scalar types. BINARY("BINARY", -1, TPrimitiveType.BINARY), diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructField.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructField.java index 6e87b1be40..a084bfe0e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructField.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructField.java @@ -22,25 +22,43 @@ import org.apache.doris.thrift.TTypeDesc; import org.apache.doris.thrift.TTypeNode; import com.google.common.base.Strings; +import com.google.gson.annotations.SerializedName; /** * TODO: Support comments for struct fields. The Metastore does not properly store * comments of struct fields. We set comment to null to avoid compatibility issues. */ public class StructField { + @SerializedName(value = "name") protected final String name; + + @SerializedName(value = "type") protected final Type type; + + @SerializedName(value = "comment") protected final String comment; + + @SerializedName(value = "position") protected int position; // in struct - public StructField(String name, Type type, String comment) { - this.name = name; + @SerializedName(value = "containsNull") + private final boolean containsNull; // Now always true (nullable field) + + private static final String DEFAULT_FIELD_NAME = "col"; + + public StructField(String name, Type type, String comment, boolean containsNull) { + this.name = name.toLowerCase(); this.type = type; this.comment = comment; + this.containsNull = containsNull; } public StructField(String name, Type type) { - this(name, type, null); + this(name, type, null, true); + } + + public StructField(Type type) { + this(DEFAULT_FIELD_NAME, type, null, true); } public String getComment() { @@ -63,11 +81,20 @@ public class StructField { this.position = position; } + public boolean getContainsNull() { + return containsNull; + } + public String toSql(int depth) { - String typeSql = (depth < Type.MAX_NESTING_DEPTH) ? type.toSql(depth) : "..."; + String typeSql; + if (depth < Type.MAX_NESTING_DEPTH) { + typeSql = !containsNull ? "not_null(" + type.toSql(depth) + ")" : type.toSql(depth); + } else { + typeSql = "..."; + } StringBuilder sb = new StringBuilder(name); if (type != null) { - sb.append(":" + typeSql); + sb.append(":").append(typeSql); } if (comment != null) { sb.append(String.format(" COMMENT '%s'", comment)); @@ -87,7 +114,7 @@ public class StructField { // even if we then strip the top-level padding. String typeStr = type.prettyPrint(lpad); typeStr = typeStr.substring(lpad); - sb.append(":" + typeStr); + sb.append(":").append(typeStr); } if (comment != null) { sb.append(String.format(" COMMENT '%s'", comment)); @@ -95,12 +122,25 @@ public class StructField { return sb.toString(); } + public static boolean canCastTo(StructField field, StructField targetField) { + // TODO(xy): support cast field + return false; + } + + public boolean matchesField(StructField f) { + if (equals(f)) { + return true; + } + return type.matchesType(f.getType()) && containsNull == f.getContainsNull(); + } + public void toThrift(TTypeDesc container, TTypeNode node) { TStructField field = new TStructField(); field.setName(name); if (comment != null) { field.setComment(comment); } + field.setContainsNull(containsNull); node.struct_fields.add(field); type.toThrift(container); } @@ -111,6 +151,7 @@ public class StructField { return false; } StructField otherStructField = (StructField) other; - return otherStructField.name.equals(name) && otherStructField.type.equals(type); + return otherStructField.name.equals(name) && otherStructField.type.equals(type) + && otherStructField.containsNull == containsNull; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java index e3ffa3a521..881d451743 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java @@ -17,6 +17,7 @@ package org.apache.doris.catalog; +import org.apache.doris.thrift.TColumnType; import org.apache.doris.thrift.TStructField; import org.apache.doris.thrift.TTypeDesc; import org.apache.doris.thrift.TTypeNode; @@ -27,6 +28,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.gson.annotations.SerializedName; import java.util.ArrayList; import java.util.HashMap; @@ -35,7 +37,14 @@ import java.util.HashMap; * Describes a STRUCT type. STRUCT types have a list of named struct fields. */ public class StructType extends Type { + + @SerializedName(value = "fieldMap") private final HashMap<String, StructField> fieldMap = Maps.newHashMap(); + + //@SerializedName(value = "positionToField") + //private final HashMap<Integer, StructField> positionToField = Maps.newHashMap(); + + @SerializedName(value = "fields") private final ArrayList<StructField> fields; public StructType(ArrayList<StructField> fields) { @@ -44,11 +53,12 @@ public class StructType extends Type { for (int i = 0; i < this.fields.size(); ++i) { this.fields.get(i).setPosition(i); fieldMap.put(this.fields.get(i).getName().toLowerCase(), this.fields.get(i)); + //positionToField.put(this.fields.get(i).getPosition(), this.fields.get(i)); } } public StructType() { - fields = Lists.newArrayList(); + this.fields = Lists.newArrayList(); } @Override @@ -74,10 +84,36 @@ public class StructType extends Type { leftPadding, Joiner.on(",\n").join(fieldsSql), leftPadding); } + public static boolean canCastTo(StructType type, StructType targetType) { + // TODO(xy) : support cast struct type + return false; + } + + @Override + public boolean isSupported() { + for (StructField f : fields) { + if (!f.getType().isSupported()) { + return false; + } + } + return true; + } + + @Override + public boolean supportSubType(Type subType) { + for (Type supportedType : Type.getStructSubTypes()) { + if (subType.getPrimitiveType() == supportedType.getPrimitiveType()) { + return true; + } + } + return false; + } + public void addField(StructField field) { field.setPosition(fields.size()); fields.add(field); fieldMap.put(field.getName().toLowerCase(), field); + //positionToField.put(field.getPosition(), field); } public ArrayList<StructField> getFields() { @@ -88,9 +124,41 @@ public class StructType extends Type { return fieldMap.get(fieldName.toLowerCase()); } + //public StructField getField(int position) { + // return positionToField.get(position); + //} + public void clearFields() { fields.clear(); fieldMap.clear(); + //positionToField.clear(); + } + + @Override + public PrimitiveType getPrimitiveType() { + return PrimitiveType.STRUCT; + } + + @Override + public boolean matchesType(Type t) { + if (equals(t)) { + return true; + } + + if (t.isStructType()) { + return false; + } + + if (fields.size() != ((StructType) t).getFields().size()) { + return false; + } + + for (int i = 0; i < fields.size(); i++) { + if (!fields.get(i).matchesField(((StructType) t).getFields().get(i))) { + return false; + } + } + return true; } @Override @@ -107,7 +175,7 @@ public class StructType extends Type { TTypeNode node = new TTypeNode(); container.types.add(node); Preconditions.checkNotNull(fields); - Preconditions.checkNotNull(!fields.isEmpty()); + Preconditions.checkState(!fields.isEmpty()); node.setType(TTypeNodeType.STRUCT); node.setStructFields(new ArrayList<TStructField>()); for (StructField field : fields) { @@ -119,4 +187,26 @@ public class StructType extends Type { public String toString() { return toSql(0); } + + @Override + public TColumnType toColumnTypeThrift() { + TColumnType thrift = new TColumnType(); + thrift.type = PrimitiveType.STRUCT.toThrift(); + return thrift; + } + + @Override + public boolean isFixedLengthType() { + return false; + } + + @Override + public boolean supportsTablePartitioning() { + return false; + } + + @Override + public int getSlotSize() { + return PrimitiveType.STRUCT.getSlotSize(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index d80aa4fc22..45d88897a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -112,6 +112,7 @@ public abstract class Type { private static final ArrayList<ScalarType> numericDateTimeTypes; private static final ArrayList<ScalarType> supportedTypes; private static final ArrayList<Type> arraySubTypes; + private static final ArrayList<Type> structSubTypes; private static final ArrayList<ScalarType> trivialTypes; static { @@ -173,6 +174,10 @@ public abstract class Type { arraySubTypes.add(CHAR); arraySubTypes.add(VARCHAR); arraySubTypes.add(STRING); + + structSubTypes = Lists.newArrayList(); + structSubTypes.add(INT); + structSubTypes.add(STRING); } public static ArrayList<ScalarType> getIntegerTypes() { @@ -199,6 +204,17 @@ public abstract class Type { return arraySubTypes; } + public static ArrayList<Type> getStructSubTypes() { + return structSubTypes; + } + + /** + * Return true if this is complex type and support subType + */ + public boolean supportSubType(Type subType) { + return false; + } + /** * The output of this is stored directly in the hive metastore as the column type. * The string must match exactly. @@ -381,7 +397,7 @@ public abstract class Type { } public boolean isCollectionType() { - return isMapType() || isArrayType() || isMultiRowType(); + return isMapType() || isArrayType() || isMultiRowType() || isStructType(); } public boolean isMapType() { @@ -506,6 +522,10 @@ public abstract class Type { && !sourceType.isNull()) { // TODO: current not support cast any non-array type(except for null) to nested array type. return false; + } else if (targetType.isStructType() && sourceType.isStringType()) { + return true; + } else if (sourceType.isStructType() && targetType.isStructType()) { + return StructType.canCastTo((StructType) sourceType, (StructType) targetType); } return sourceType.isNull() || sourceType.getPrimitiveType().isCharFamily(); } @@ -828,7 +848,7 @@ public abstract class Type { } Pair<Type, Integer> res = fromThrift(col, tmpNodeIdx); tmpNodeIdx = res.second.intValue(); - structFields.add(new StructField(name, res.first, comment)); + structFields.add(new StructField(name, res.first, comment, true)); } type = new StructType(structFields); break; diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index b66a91004a..c56743bd22 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -499,6 +499,8 @@ import org.apache.doris.qe.SqlModeHelper; tokenIdMap.put(new Integer(SqlParserSymbols.RPAREN), ")"); tokenIdMap.put(new Integer(SqlParserSymbols.LBRACKET), "["); tokenIdMap.put(new Integer(SqlParserSymbols.RBRACKET), "]"); + tokenIdMap.put(new Integer(SqlParserSymbols.LBRACE), "{"); + tokenIdMap.put(new Integer(SqlParserSymbols.RBRACE), "}"); tokenIdMap.put(new Integer(SqlParserSymbols.COLON), ":"); tokenIdMap.put(new Integer(SqlParserSymbols.SEMICOLON), ";"); tokenIdMap.put(new Integer(SqlParserSymbols.FLOATINGPOINT_LITERAL), @@ -647,6 +649,8 @@ EndOfLineComment = "--" !({HintContent}|{ContainsLineTerminator}) {LineTerminato ";" { return newToken(SqlParserSymbols.SEMICOLON, null); } "[" { return newToken(SqlParserSymbols.LBRACKET, null); } "]" { return newToken(SqlParserSymbols.RBRACKET, null); } +"{" { return newToken(SqlParserSymbols.LBRACE, null); } +"}" { return newToken(SqlParserSymbols.RBRACE, null); } "/" { return newToken(SqlParserSymbols.DIVIDE, null); } "%" { return newToken(SqlParserSymbols.MOD, null); } "+" { return newToken(SqlParserSymbols.ADD, null); } diff --git a/gensrc/proto/types.proto b/gensrc/proto/types.proto index 5686a6ac9d..dea020d0b4 100644 --- a/gensrc/proto/types.proto +++ b/gensrc/proto/types.proto @@ -39,6 +39,7 @@ message PScalarType { message PStructField { required string name = 1; optional string comment = 2; + optional bool contains_null = 3; }; message PTypeNode { diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift index 230deb51ff..d25dce63a1 100644 --- a/gensrc/thrift/Exprs.thrift +++ b/gensrc/thrift/Exprs.thrift @@ -44,6 +44,7 @@ enum TExprNodeType { INFO_FUNC, FUNCTION_CALL, ARRAY_LITERAL, + STRUCT_LITERAL, // TODO: old style compute functions. this will be deprecated COMPUTE_FUNCTION_CALL, diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index 589b95f77c..466d0d3b18 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -125,6 +125,7 @@ struct TScalarType { struct TStructField { 1: required string name 2: optional string comment + 3: optional bool contains_null } struct TTypeNode { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org