This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 991fdff9ad0 [Improve](TabletSchemaCache) reduce duplicated memory consumption for column name and column path (#31141) 991fdff9ad0 is described below commit 991fdff9ad0c990ba2f74962174c5fbfe8cc138f Author: lihangyu <15605149...@163.com> AuthorDate: Thu Mar 7 20:31:59 2024 +0800 [Improve](TabletSchemaCache) reduce duplicated memory consumption for column name and column path (#31141) Both could be reference to related field in TabletColumn.And use shared_ptr for TabletColumn in TabletSchema for later memory reuse --- be/src/olap/delta_writer.cpp | 2 +- be/src/olap/field.h | 7 +- be/src/olap/in_list_predicate.h | 7 - be/src/olap/row_cursor.cpp | 7 +- be/src/olap/row_cursor.h | 6 +- be/src/olap/rowset/beta_rowset.cpp | 6 +- be/src/olap/rowset/beta_rowset_writer.cpp | 4 +- be/src/olap/rowset/segcompaction.cpp | 4 +- be/src/olap/rowset/segment_creator.cpp | 4 +- .../rowset/segment_v2/hierarchical_data_reader.cpp | 2 +- be/src/olap/rowset/segment_v2/segment.cpp | 53 +++++--- be/src/olap/rowset/segment_v2/segment.h | 2 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 12 +- be/src/olap/rowset/segment_v2/segment_writer.cpp | 5 +- .../rowset/segment_v2/vertical_segment_writer.cpp | 5 +- be/src/olap/schema.cpp | 4 +- be/src/olap/schema.h | 24 ++-- be/src/olap/tablet_schema.cpp | 142 +++++++++++---------- be/src/olap/tablet_schema.h | 54 ++++---- be/src/service/internal_service.cpp | 12 +- be/src/vec/common/schema_util.cpp | 70 +++++----- be/src/vec/exec/scan/new_olap_scanner.cpp | 4 +- be/src/vec/json/path_in_data.cpp | 7 +- be/src/vec/json/path_in_data.h | 16 +++ be/src/vec/jsonb/serialize.cpp | 2 +- be/src/vec/olap/olap_data_convertor.cpp | 2 +- be/test/olap/delete_bitmap_calculator_test.cpp | 34 ++--- be/test/olap/memtable_flush_executor_test.cpp | 17 ++- .../olap/rowset/segment_v2/zone_map_index_test.cpp | 18 +-- be/test/olap/tablet_schema_helper.cpp | 111 ++++++++-------- be/test/olap/tablet_schema_helper.h | 29 +++-- be/test/vec/exec/vgeneric_iterators_test.cpp | 17 ++- 32 files changed, 374 insertions(+), 315 deletions(-) diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index a405e6afa03..a6bed0f40c9 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -253,7 +253,7 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(const PNodeInfo& node_info) auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema(); if (!tablet_schema->skip_write_index_on_load()) { for (auto& column : tablet_schema->columns()) { - const TabletIndex* index_meta = tablet_schema->get_inverted_index(column); + const TabletIndex* index_meta = tablet_schema->get_inverted_index(*column); if (index_meta) { indices_ids.emplace_back(index_meta->index_id(), index_meta->get_index_suffix()); } diff --git a/be/src/olap/field.h b/be/src/olap/field.h index be95f1a0e34..6a2d407ff6c 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -32,6 +32,7 @@ #include "util/hash_util.hpp" #include "util/slice.h" #include "vec/common/arena.h" +#include "vec/json/path_in_data.h" namespace doris { @@ -48,7 +49,7 @@ public: _index_size(column.index_length()), _is_nullable(column.is_nullable()), _unique_id(column.unique_id()), - _path(column.path_info()) {} + _path(column.path_info_ptr()) {} virtual ~Field() = default; @@ -58,7 +59,7 @@ public: size_t index_size() const { return _index_size; } int32_t unique_id() const { return _unique_id; } const std::string& name() const { return _name; } - const vectorized::PathInData& path() const { return _path; } + const vectorized::PathInDataPtr& path() const { return _path; } virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); } virtual void set_to_zone_map_max(char* buf) const { set_to_max(buf); } @@ -257,7 +258,7 @@ private: int32_t _precision; int32_t _scale; int32_t _unique_id; - vectorized::PathInData _path; + vectorized::PathInDataPtr _path; }; class MapField : public Field { diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 5d7fb783239..6800b563fae 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -36,13 +36,6 @@ #include "vec/common/string_ref.h" #include "vec/core/types.h" -template <> -struct std::equal_to<doris::StringRef> { - bool operator()(const doris::StringRef& lhs, const doris::StringRef& rhs) const { - return lhs == rhs; - } -}; - // for uint24_t template <> struct std::hash<doris::uint24_t> { diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp index 3bbd699ff27..bf3ae2c5219 100644 --- a/be/src/olap/row_cursor.cpp +++ b/be/src/olap/row_cursor.cpp @@ -28,6 +28,7 @@ #include "olap/field.h" #include "olap/olap_common.h" #include "olap/olap_define.h" +#include "olap/tablet_schema.h" #include "util/slice.h" using std::nothrow; @@ -78,7 +79,7 @@ Status RowCursor::_init(const std::shared_ptr<Schema>& shared_schema, return _init(columns); } -Status RowCursor::_init(const std::vector<TabletColumn>& schema, +Status RowCursor::_init(const std::vector<TabletColumnPtr>& schema, const std::vector<uint32_t>& columns) { _schema.reset(new Schema(schema, columns)); return _init(columns); @@ -137,7 +138,7 @@ Status RowCursor::init(TabletSchemaSPtr schema) { return init(schema->columns(), schema->num_columns()); } -Status RowCursor::init(const std::vector<TabletColumn>& schema) { +Status RowCursor::init(const std::vector<TabletColumnPtr>& schema) { return init(schema, schema.size()); } @@ -157,7 +158,7 @@ Status RowCursor::init(TabletSchemaSPtr schema, size_t column_count) { return Status::OK(); } -Status RowCursor::init(const std::vector<TabletColumn>& schema, size_t column_count) { +Status RowCursor::init(const std::vector<TabletColumnPtr>& schema, size_t column_count) { if (column_count > schema.size()) { return Status::Error<INVALID_ARGUMENT>( "Input param are invalid. Column count is bigger than num_columns of schema. " diff --git a/be/src/olap/row_cursor.h b/be/src/olap/row_cursor.h index 9f39b5d4f7b..e7d2d016bde 100644 --- a/be/src/olap/row_cursor.h +++ b/be/src/olap/row_cursor.h @@ -46,10 +46,10 @@ public: // Create a RowCursor based on the schema Status init(TabletSchemaSPtr schema); - Status init(const std::vector<TabletColumn>& schema); + Status init(const std::vector<TabletColumnPtr>& schema); // Create a RowCursor based on the first n columns of the schema - Status init(const std::vector<TabletColumn>& schema, size_t column_count); + Status init(const std::vector<TabletColumnPtr>& schema, size_t column_count); Status init(TabletSchemaSPtr schema, size_t column_count); // Create a RowCursor based on the schema and column id list @@ -123,7 +123,7 @@ private: Status _init(const std::shared_ptr<Schema>& shared_schema, const std::vector<uint32_t>& columns); // common init function - Status _init(const std::vector<TabletColumn>& schema, const std::vector<uint32_t>& columns); + Status _init(const std::vector<TabletColumnPtr>& schema, const std::vector<uint32_t>& columns); Status _alloc_buf(); Status _init_scan_key(TabletSchemaSPtr schema, const std::vector<std::string>& scan_keys); diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 057e3411f4f..e3f28726c4d 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -192,7 +192,7 @@ Status BetaRowset::remove() { success = false; } for (auto& column : _schema->columns()) { - const TabletIndex* index_meta = _schema->get_inverted_index(column); + const TabletIndex* index_meta = _schema->get_inverted_index(*column); if (index_meta) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_name( seg_path, index_meta->index_id(), index_meta->get_index_suffix()); @@ -320,7 +320,7 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row RETURN_IF_ERROR(io::global_local_filesystem()->copy_path(src_path, dst_path)); for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->get_inverted_index(column); + const TabletIndex* index_meta = _schema->get_inverted_index(*column); if (index_meta) { std::string inverted_index_src_file_path = InvertedIndexDescriptor::get_index_file_name( @@ -355,7 +355,7 @@ Status BetaRowset::upload_to(io::RemoteFileSystem* dest_fs, const RowsetId& new_ local_paths.push_back(local_seg_path); for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->get_inverted_index(column); + const TabletIndex* index_meta = _schema->get_inverted_index(*column); if (index_meta) { std::string remote_inverted_index_file = InvertedIndexDescriptor::get_index_file_name( diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 2b260b7acae..d75f7e44e6e 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -351,8 +351,8 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t begin, int64_t end, u int ret; // rename remaining inverted index files for (auto column : _context.tablet_schema->columns()) { - if (_context.tablet_schema->has_inverted_index(column)) { - auto index_info = _context.tablet_schema->get_inverted_index(column); + if (_context.tablet_schema->has_inverted_index(*column)) { + auto index_info = _context.tablet_schema->get_inverted_index(*column); auto index_id = index_info->index_id(); auto src_idx_path = begin < 0 ? InvertedIndexDescriptor::inverted_index_file_path( diff --git a/be/src/olap/rowset/segcompaction.cpp b/be/src/olap/rowset/segcompaction.cpp index 81a73b99ebb..57e6e867205 100644 --- a/be/src/olap/rowset/segcompaction.cpp +++ b/be/src/olap/rowset/segcompaction.cpp @@ -134,8 +134,8 @@ Status SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e strings::Substitute("Failed to delete file=$0", seg_path)); // Delete inverted index files for (auto column : schema->columns()) { - if (schema->has_inverted_index(column)) { - auto index_info = schema->get_inverted_index(column); + if (schema->has_inverted_index(*column)) { + auto index_info = schema->get_inverted_index(*column); auto index_id = index_info->index_id(); auto idx_path = InvertedIndexDescriptor::inverted_index_file_path( ctx.rowset_dir, ctx.rowset_id, i, index_id, index_info->get_index_suffix()); diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index 7ee11c2bcf3..8f2553ade59 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -95,7 +95,7 @@ Status SegmentFlusher::_expand_variant_to_subcolumns(vectorized::Block& block, if (_context->partial_update_info && _context->partial_update_info->is_partial_update) { // check columns that used to do partial updates should not include variant for (int i : _context->partial_update_info->update_cids) { - const auto& col = _context->original_tablet_schema->columns()[i]; + const auto& col = *_context->original_tablet_schema->columns()[i]; if (!col.is_key() && col.name() != DELETE_SIGN) { return Status::InvalidArgument( "Not implement partial update for variant only support delete currently"); @@ -104,7 +104,7 @@ Status SegmentFlusher::_expand_variant_to_subcolumns(vectorized::Block& block, } else { // find positions of variant columns for (int i = 0; i < _context->original_tablet_schema->columns().size(); ++i) { - if (_context->original_tablet_schema->columns()[i].is_variant_type()) { + if (_context->original_tablet_schema->columns()[i]->is_variant_type()) { variant_column_pos.push_back(i); } } diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index 1deae3a57dd..2feb865de28 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -182,7 +182,7 @@ Status ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t nrows // since some other column may depend on it. vectorized::MutableColumnPtr extracted_column; RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b - _col.path_info().copy_pop_front(), extracted_column)); + _col.path_info_ptr()->copy_pop_front(), extracted_column)); if (_target_type_hint != nullptr) { variant.create_root(_target_type_hint, _target_type_hint->create_column()); diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 3d78cd5bd4d..9c392e6088c 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -128,7 +128,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o const TabletColumn& col = read_options.tablet_schema->column(column_id); ColumnReader* reader = nullptr; if (col.is_extracted_column()) { - const auto* node = _sub_column_tree.find_exact(col.path_info()); + const auto* node = _sub_column_tree.find_exact(*col.path_info_ptr()); reader = node != nullptr ? node->data.reader.get() : nullptr; } else { reader = _column_readers.contains(col.unique_id()) @@ -341,12 +341,12 @@ Status Segment::_load_index_impl() { // Return the storage datatype of related column to field. // Return nullptr meaning no such storage infomation for this column -vectorized::DataTypePtr Segment::get_data_type_of(vectorized::PathInData path, bool is_nullable, +vectorized::DataTypePtr Segment::get_data_type_of(vectorized::PathInDataPtr path, bool is_nullable, bool ignore_children) const { // Path has higher priority - if (!path.empty()) { - auto node = _sub_column_tree.find_leaf(path); - auto sparse_node = _sparse_column_tree.find_exact(path); + if (path != nullptr && !path->empty()) { + auto node = _sub_column_tree.find_leaf(*path); + auto sparse_node = _sparse_column_tree.find_exact(*path); if (node) { if (ignore_children || (node->children.empty() && sparse_node == nullptr)) { return node->data.file_column_type; @@ -398,7 +398,10 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { // init by column path for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) { auto& column = _tablet_schema->column(ordinal); - auto iter = column_path_to_footer_ordinal.find(column.path_info()); + if (!column.has_path_info()) { + continue; + } + auto iter = column_path_to_footer_ordinal.find(*column.path_info_ptr()); if (iter == column_path_to_footer_ordinal.end()) { continue; } @@ -469,18 +472,22 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, std::unique_ptr<ColumnIterator>* iter, const StorageReadOptions* opt) { vectorized::PathInData root_path; - if (tablet_column.path_info().empty()) { + if (!tablet_column.has_path_info()) { // Missing path info, but need read the whole variant column root_path = vectorized::PathInData(tablet_column.name_lower_case()); } else { - root_path = vectorized::PathInData({tablet_column.path_info().get_parts()[0]}); + root_path = vectorized::PathInData({tablet_column.path_info_ptr()->get_parts()[0]}); } auto root = _sub_column_tree.find_leaf(root_path); - auto node = _sub_column_tree.find_exact(tablet_column.path_info()); - auto sparse_node = _sparse_column_tree.find_exact(tablet_column.path_info()); + auto node = tablet_column.has_path_info() + ? _sub_column_tree.find_exact(*tablet_column.path_info_ptr()) + : nullptr; + auto sparse_node = tablet_column.has_path_info() + ? _sparse_column_tree.find_exact(*tablet_column.path_info_ptr()) + : nullptr; if (opt != nullptr && opt->io_ctx.reader_type == ReaderType::READER_ALTER_TABLE) { CHECK(tablet_column.is_variant_type()); - if (node == nullptr) { + if (root == nullptr) { // No such variant column in this segment, get a default one RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); return Status::OK(); @@ -490,13 +497,15 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, // subcolumns of variant during processing rewriting rowsets. // This is slow, since it needs to read all sub columns and merge them into a single column RETURN_IF_ERROR( - HierarchicalDataReader::create(iter, root_path, node, root, output_as_raw_json)); + HierarchicalDataReader::create(iter, root_path, root, root, output_as_raw_json)); return Status::OK(); } if (opt == nullptr || opt->io_ctx.reader_type != ReaderType::READER_QUERY) { // Could be compaction ..etc and read flat leaves nodes data - const auto* node = _sub_column_tree.find_leaf(tablet_column.path_info()); + const auto* node = tablet_column.has_path_info() + ? _sub_column_tree.find_leaf(*tablet_column.path_info_ptr()) + : nullptr; if (!node) { // sparse_columns have this path, read from root if (sparse_node != nullptr && sparse_node->is_leaf_node()) { @@ -517,15 +526,15 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, if (node->is_leaf_node() && sparse_node == nullptr) { // Node contains column without any child sub columns and no corresponding sparse columns // Direct read extracted columns - const auto* node = _sub_column_tree.find_leaf(tablet_column.path_info()); + const auto* node = _sub_column_tree.find_leaf(*tablet_column.path_info_ptr()); ColumnIterator* it; RETURN_IF_ERROR(node->data.reader->new_iterator(&it)); iter->reset(it); } else { // Node contains column with children columns or has correspoding sparse columns // Create reader with hirachical data - RETURN_IF_ERROR( - HierarchicalDataReader::create(iter, tablet_column.path_info(), node, root)); + RETURN_IF_ERROR(HierarchicalDataReader::create(iter, *tablet_column.path_info_ptr(), + node, root)); } } else { // No such node, read from either sparse column or default column @@ -553,7 +562,7 @@ Status Segment::new_column_iterator(const TabletColumn& tablet_column, std::unique_ptr<ColumnIterator>* iter, const StorageReadOptions* opt) { // init column iterator by path info - if (!tablet_column.path_info().empty() || tablet_column.is_variant_type()) { + if (tablet_column.has_path_info() || tablet_column.is_variant_type()) { return new_column_iterator_with_path(tablet_column, iter, opt); } // init default iterator @@ -587,8 +596,9 @@ Status Segment::new_column_iterator(int32_t unique_id, std::unique_ptr<ColumnIte ColumnReader* Segment::_get_column_reader(const TabletColumn& col) { // init column iterator by path info - if (!col.path_info().empty() || col.is_variant_type()) { - auto node = _sub_column_tree.find_exact(col.path_info()); + if (col.has_path_info() || col.is_variant_type()) { + auto node = + col.has_path_info() ? _sub_column_tree.find_exact(*col.path_info_ptr()) : nullptr; if (node != nullptr) { return node->data.reader.get(); } @@ -770,8 +780,9 @@ Status Segment::seek_and_read_by_rowid(const TabletSchema& schema, SlotDescripto }; std::vector<segment_v2::rowid_t> single_row_loc {row_id}; if (!slot->column_paths().empty()) { - vectorized::PathInData path(schema.column_by_uid(slot->col_unique_id()).name_lower_case(), - slot->column_paths()); + vectorized::PathInDataPtr path = std::make_shared<vectorized::PathInData>( + schema.column_by_uid(slot->col_unique_id()).name_lower_case(), + slot->column_paths()); auto storage_type = get_data_type_of(path, slot->is_nullable(), false); vectorized::MutableColumnPtr file_storage_column = storage_type->create_column(); DCHECK(storage_type != nullptr); diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index dc0f18ff02d..af5d1896f47 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -153,7 +153,7 @@ public: // ignore_chidren set to false will treat field as variant // when it contains children with field paths. // nullptr will returned if storage type does not contains such column - std::shared_ptr<const vectorized::IDataType> get_data_type_of(vectorized::PathInData path, + std::shared_ptr<const vectorized::IDataType> get_data_type_of(vectorized::PathInDataPtr path, bool is_nullable, bool ignore_children) const; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index e8e1aa386a5..fe2ee5c7312 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1782,7 +1782,8 @@ void SegmentIterator::_init_current_block( "Recreate column with expected type {}, file column type {}, col_name {}, " "col_path {}", block->get_by_position(i).type->get_name(), file_column_type->get_name(), - column_desc->name(), column_desc->path().get_path()); + column_desc->name(), + column_desc->path() == nullptr ? "" : column_desc->path()->get_path()); // TODO reuse current_columns[cid] = file_column_type->create_column(); current_columns[cid]->reserve(_opts.block_row_max); @@ -2104,10 +2105,11 @@ Status SegmentIterator::_convert_to_expected_type(const std::vector<ColumnId>& c expected_type, &expected)); _current_return_columns[i] = expected->assume_mutable(); _converted_column_ids[i] = 1; - VLOG_DEBUG << fmt::format("Convert {} fom file column type {} to {}, num_rows {}", - field_type->path().get_path(), file_column_type->get_name(), - expected_type->get_name(), - _current_return_columns[i]->size()); + VLOG_DEBUG << fmt::format( + "Convert {} fom file column type {} to {}, num_rows {}", + field_type->path() == nullptr ? "" : field_type->path()->get_path(), + file_column_type->get_name(), expected_type->get_name(), + _current_return_columns[i]->size()); } } return Status::OK(); diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 27a744278df..283b96080ab 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -149,8 +149,9 @@ void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t column_id, meta->set_default_value(column.default_value()); meta->set_precision(column.precision()); meta->set_frac(column.frac()); - if (!column.path_info().empty()) { - column.path_info().to_protobuf(meta->mutable_column_path_info(), column.parent_unique_id()); + if (column.has_path_info()) { + column.path_info_ptr()->to_protobuf(meta->mutable_column_path_info(), + column.parent_unique_id()); } meta->set_unique_id(column.unique_id()); for (uint32_t i = 0; i < column.get_subtype_count(); ++i) { diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index a7c47197473..a7e0dc9f241 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -121,8 +121,9 @@ void VerticalSegmentWriter::_init_column_meta(ColumnMetaPB* meta, uint32_t colum meta->set_default_value(column.default_value()); meta->set_precision(column.precision()); meta->set_frac(column.frac()); - if (!column.path_info().empty()) { - column.path_info().to_protobuf(meta->mutable_column_path_info(), column.parent_unique_id()); + if (column.has_path_info()) { + column.path_info_ptr()->to_protobuf(meta->mutable_column_path_info(), + column.parent_unique_id()); } meta->set_unique_id(column.unique_id()); for (uint32_t i = 0; i < column.get_subtype_count(); ++i) { diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp index 5cfa1d5e381..28c12b999d0 100644 --- a/be/src/olap/schema.cpp +++ b/be/src/olap/schema.cpp @@ -67,7 +67,7 @@ void Schema::_copy_from(const Schema& other) { } } -void Schema::_init(const std::vector<TabletColumn>& cols, const std::vector<ColumnId>& col_ids, +void Schema::_init(const std::vector<TabletColumnPtr>& cols, const std::vector<ColumnId>& col_ids, size_t num_key_columns) { _col_ids = col_ids; _num_key_columns = num_key_columns; @@ -81,7 +81,7 @@ void Schema::_init(const std::vector<TabletColumn>& cols, const std::vector<Colu if (col_id_set.find(cid) == col_id_set.end()) { continue; } - _cols[cid] = FieldFactory::create(cols[cid]); + _cols[cid] = FieldFactory::create(*cols[cid]); _col_offsets[cid] = offset; // Plus 1 byte for null byte diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index 64d6a7544b5..6414db4153a 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -52,12 +52,12 @@ public: Schema(TabletSchemaSPtr tablet_schema) { size_t num_columns = tablet_schema->num_columns(); // ignore this column - if (tablet_schema->columns().back().name() == BeConsts::ROW_STORE_COL) { + if (tablet_schema->columns().back()->name() == BeConsts::ROW_STORE_COL) { --num_columns; } std::vector<ColumnId> col_ids(num_columns); _unique_ids.resize(num_columns); - std::vector<TabletColumn> columns; + std::vector<TabletColumnPtr> columns; columns.reserve(num_columns); size_t num_key_columns = 0; @@ -74,7 +74,7 @@ public: if (column.name() == VERSION_COL) { _version_col_idx = cid; } - columns.push_back(column); + columns.push_back(std::make_shared<TabletColumn>(column)); } _delete_sign_idx = tablet_schema->delete_sign_idx(); if (tablet_schema->has_sequence_col()) { @@ -84,34 +84,34 @@ public: } // All the columns of one table may exist in the columns param, but col_ids is only a subset. - Schema(const std::vector<TabletColumn>& columns, const std::vector<ColumnId>& col_ids) { + Schema(const std::vector<TabletColumnPtr>& columns, const std::vector<ColumnId>& col_ids) { size_t num_key_columns = 0; _unique_ids.resize(columns.size()); for (size_t i = 0; i < columns.size(); ++i) { - if (columns[i].is_key()) { + if (columns[i]->is_key()) { ++num_key_columns; } - if (columns[i].name() == DELETE_SIGN) { + if (columns[i]->name() == DELETE_SIGN) { _delete_sign_idx = i; } - if (columns[i].name() == BeConsts::ROWID_COL) { + if (columns[i]->name() == BeConsts::ROWID_COL) { _rowid_col_idx = i; } - if (columns[i].name() == VERSION_COL) { + if (columns[i]->name() == VERSION_COL) { _version_col_idx = i; } - _unique_ids[i] = columns[i].unique_id(); + _unique_ids[i] = columns[i]->unique_id(); } _init(columns, col_ids, num_key_columns); } // Only for UT - Schema(const std::vector<TabletColumn>& columns, size_t num_key_columns) { + Schema(const std::vector<TabletColumnPtr>& columns, size_t num_key_columns) { std::vector<ColumnId> col_ids(columns.size()); _unique_ids.resize(columns.size()); for (uint32_t cid = 0; cid < columns.size(); ++cid) { col_ids[cid] = cid; - _unique_ids[cid] = columns[cid].unique_id(); + _unique_ids[cid] = columns[cid]->unique_id(); } _init(columns, col_ids, num_key_columns); @@ -183,7 +183,7 @@ public: int64_t mem_size() const { return _mem_size; } private: - void _init(const std::vector<TabletColumn>& cols, const std::vector<ColumnId>& col_ids, + void _init(const std::vector<TabletColumnPtr>& cols, const std::vector<ColumnId>& col_ids, size_t num_key_columns); void _init(const std::vector<const Field*>& cols, const std::vector<ColumnId>& col_ids, size_t num_key_columns); diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 80108d41f54..19c107971bc 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -45,9 +45,11 @@ #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/aggregate_functions/aggregate_function_state_union.h" #include "vec/common/hex.h" +#include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/json/path_in_data.h" namespace doris { @@ -554,13 +556,14 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { add_sub_column(child_column); } if (column.has_column_path_info()) { - _column_path.from_protobuf(column.column_path_info()); + _column_path = std::make_shared<vectorized::PathInData>(); + _column_path->from_protobuf(column.column_path_info()); _parent_col_unique_id = column.column_path_info().parrent_column_unique_id(); } for (auto& column_pb : column.sparse_columns()) { TabletColumn column; column.init_from_pb(column_pb); - _sparse_cols.emplace_back(std::move(column)); + _sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(column))); _num_sparse_columns++; } } @@ -615,13 +618,13 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const { for (size_t i = 0; i < _sub_columns.size(); i++) { ColumnPB* child = column->add_children_columns(); - _sub_columns[i].to_schema_pb(child); + _sub_columns[i]->to_schema_pb(child); } // set parts info - if (!_column_path.empty()) { + if (has_path_info()) { // CHECK_GT(_parent_col_unique_id, 0); - _column_path.to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id); + _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id); // Update unstable information for variant columns. Some of the fields in the tablet schema // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth // in the number of tablet schema cache entries. @@ -632,12 +635,12 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const { } for (auto& col : _sparse_cols) { ColumnPB* sparse_column = column->add_sparse_columns(); - col.to_schema_pb(sparse_column); + col->to_schema_pb(sparse_column); } } void TabletColumn::add_sub_column(TabletColumn& sub_column) { - _sub_columns.push_back(sub_column); + _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column)); sub_column._parent_col_unique_id = this->_unique_id; _sub_column_count += 1; } @@ -674,7 +677,7 @@ vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(std::strin } void TabletColumn::set_path_info(const vectorized::PathInData& path) { - _column_path = path; + _column_path = std::make_shared<vectorized::PathInData>(path); } vectorized::DataTypePtr TabletColumn::get_vec_type() const { @@ -807,12 +810,11 @@ void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { } if (column.is_variant_type()) { ++_num_variant_columns; - if (column.path_info().empty()) { + if (!column.has_path_info()) { const std::string& col_name = column.name_lower_case(); vectorized::PathInData path(col_name); column.set_path_info(path); } - _field_path_to_index[column.path_info()] = _num_columns; } if (UNLIKELY(column.name() == DELETE_SIGN)) { _delete_sign_idx = _num_columns; @@ -821,21 +823,21 @@ void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { } else if (UNLIKELY(column.name() == VERSION_COL)) { _version_col_idx = _num_columns; } + _field_id_to_index[column.unique_id()] = _num_columns; + _cols.push_back(std::make_shared<TabletColumn>(std::move(column))); // The dropped column may have same name with exsiting column, so that // not add to name to index map, only for uid to index map - if (col_type == ColumnType::NORMAL) { - _field_name_to_index[column.name()] = _num_columns; - } else if (col_type == ColumnType::VARIANT) { - _field_name_to_index[column.name()] = _num_columns; - _field_path_to_index[column.path_info()] = _num_columns; + if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type()) { + _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); + _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns; + } else if (col_type == ColumnType::NORMAL) { + _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); } - _field_id_to_index[column.unique_id()] = _num_columns; - _cols.push_back(std::move(column)); _num_columns++; } void TabletColumn::append_sparse_column(TabletColumn column) { - _sparse_cols.push_back(std::move(column)); + _sparse_cols.push_back(std::make_shared<TabletColumn>(column)); _num_sparse_columns++; } @@ -846,7 +848,7 @@ void TabletSchema::append_index(TabletIndex index) { void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) { int32_t col_unique_id = col.unique_id(); const std::string& suffix_path = - !col.path_info().empty() ? escape_for_path_name(col.path_info().get_path()) : ""; + col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; for (size_t i = 0; i < _indexes.size(); i++) { for (int32_t id : _indexes[i].col_unique_ids()) { if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { @@ -856,6 +858,11 @@ void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) { } } +void TabletSchema::replace_column(size_t pos, TabletColumn new_col) { + CHECK_LT(pos, num_columns()) << " outof range"; + _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col)); +} + void TabletSchema::clear_index() { _indexes.clear(); } @@ -911,9 +918,9 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac if (column.is_variant_type()) { ++_num_variant_columns; } - _field_name_to_index[column.name()] = _num_columns; - _field_id_to_index[column.unique_id()] = _num_columns; - _cols.emplace_back(std::move(column)); + _cols.emplace_back(std::make_shared<TabletColumn>(std::move(column))); + _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); + _field_id_to_index[_cols.back()->unique_id()] = _num_columns; _num_columns++; } for (auto& index_pb : schema.index()) { @@ -1016,9 +1023,9 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version } else if (UNLIKELY(column->name() == VERSION_COL)) { _version_col_idx = _num_columns; } - _field_name_to_index[column->name()] = _num_columns; - _field_id_to_index[column->unique_id()] = _num_columns; - _cols.emplace_back(*column); + _cols.emplace_back(std::make_shared<TabletColumn>(*column)); + _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); + _field_id_to_index[_cols.back()->unique_id()] = _num_columns; _num_columns++; } @@ -1041,12 +1048,13 @@ void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) { return; } for (const auto& src_col : src_schema.columns()) { - if (_field_id_to_index.find(src_col.unique_id()) == _field_id_to_index.end()) { - CHECK(!src_col.is_key()) << src_col.name() << " is key column, should not be dropped."; + if (_field_id_to_index.find(src_col->unique_id()) == _field_id_to_index.end()) { + CHECK(!src_col->is_key()) + << src_col->name() << " is key column, should not be dropped."; ColumnPB src_col_pb; // There are some pointer in tablet column, not sure the reference relation, so // that deep copy it. - src_col.to_schema_pb(&src_col_pb); + src_col->to_schema_pb(&src_col_pb); TabletColumn new_col(src_col_pb); append_column(new_col, TabletSchema::ColumnType::DROPPED); } @@ -1067,21 +1075,21 @@ bool TabletSchema::is_dropped_column(const TabletColumn& col) const { CHECK(_field_id_to_index.find(col.unique_id()) != _field_id_to_index.end()) << "could not find col with unique id = " << col.unique_id() << " and name = " << col.name(); - return _field_name_to_index.find(col.name()) == _field_name_to_index.end() || + return _field_name_to_index.find(StringRef(col.name())) == _field_name_to_index.end() || column(col.name()).unique_id() != col.unique_id(); } void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) { std::unordered_set<int32_t> variant_columns; for (const auto& col : columns()) { - if (col.is_variant_type()) { - variant_columns.insert(col.unique_id()); + if (col->is_variant_type()) { + variant_columns.insert(col->unique_id()); } } - for (const TabletColumn& col : src_schema.columns()) { - if (col.is_extracted_column() && variant_columns.contains(col.parent_unique_id())) { + for (const TabletColumnPtr& col : src_schema.columns()) { + if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) { ColumnPB col_pb; - col.to_schema_pb(&col_pb); + col->to_schema_pb(&col_pb); TabletColumn new_col(col_pb); append_column(new_col, ColumnType::VARIANT); } @@ -1090,7 +1098,7 @@ void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) { void TabletSchema::reserve_extracted_columns() { for (auto it = _cols.begin(); it != _cols.end();) { - if (!it->is_extracted_column()) { + if (!(*it)->is_extracted_column()) { it = _cols.erase(it); } else { ++it; @@ -1103,12 +1111,12 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { tablet_schema_pb->add_cluster_key_idxes(i); } tablet_schema_pb->set_keys_type(_keys_type); - for (auto& col : _cols) { + for (const auto& col : _cols) { ColumnPB* column = tablet_schema_pb->add_column(); - col.to_schema_pb(column); + col->to_schema_pb(column); } - for (auto& index : _indexes) { - auto index_pb = tablet_schema_pb->add_index(); + for (const auto& index : _indexes) { + auto* index_pb = tablet_schema_pb->add_index(); index.to_schema_pb(index_pb); } tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns); @@ -1134,8 +1142,8 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { size_t TabletSchema::row_size() const { size_t size = 0; - for (auto& column : _cols) { - size += column.length(); + for (const auto& column : _cols) { + size += column->length(); } size += (_num_columns + 7) / 8; @@ -1143,12 +1151,12 @@ size_t TabletSchema::row_size() const { } int32_t TabletSchema::field_index(const std::string& field_name) const { - const auto& found = _field_name_to_index.find(field_name); + const auto& found = _field_name_to_index.find(StringRef(field_name)); return (found == _field_name_to_index.end()) ? -1 : found->second; } int32_t TabletSchema::field_index(const vectorized::PathInData& path) const { - const auto& found = _field_path_to_index.find(path); + const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path)); return (found == _field_path_to_index.end()) ? -1 : found->second; } @@ -1157,35 +1165,31 @@ int32_t TabletSchema::field_index(int32_t col_unique_id) const { return (found == _field_id_to_index.end()) ? -1 : found->second; } -const std::vector<TabletColumn>& TabletSchema::columns() const { +const std::vector<TabletColumnPtr>& TabletSchema::columns() const { return _cols; } -const std::vector<TabletColumn>& TabletColumn::sparse_columns() const { +const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const { return _sparse_cols; } -std::vector<TabletColumn>& TabletSchema::mutable_columns() { - return _cols; -} - const TabletColumn& TabletSchema::column(size_t ordinal) const { DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns; - return _cols[ordinal]; + return *_cols[ordinal]; } const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const { DCHECK(ordinal < _sparse_cols.size()) << "ordinal:" << ordinal << ", _num_columns:" << _sparse_cols.size(); - return _sparse_cols[ordinal]; + return *_sparse_cols[ordinal]; } const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const { - return _cols.at(_field_id_to_index.at(col_unique_id)); + return *_cols.at(_field_id_to_index.at(col_unique_id)); } TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) { - return _cols.at(_field_id_to_index.at(col_unique_id)); + return *_cols.at(_field_id_to_index.at(col_unique_id)); } void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) { @@ -1199,7 +1203,7 @@ void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTabl } Status TabletSchema::have_column(const std::string& field_name) const { - if (!_field_name_to_index.contains(field_name)) { + if (!_field_name_to_index.contains(StringRef(field_name))) { return Status::Error<ErrorCode::INTERNAL_ERROR>( "Not found field_name, field_name:{}, schema:{}", field_name, get_all_field_names()); @@ -1208,10 +1212,10 @@ Status TabletSchema::have_column(const std::string& field_name) const { } const TabletColumn& TabletSchema::column(const std::string& field_name) const { - DCHECK(_field_name_to_index.contains(field_name)) + DCHECK(_field_name_to_index.contains(StringRef(field_name)) != 0) << ", field_name=" << field_name << ", field_name_to_index=" << get_all_field_names(); - const auto& found = _field_name_to_index.find(field_name); - return _cols[found->second]; + const auto& found = _field_name_to_index.find(StringRef(field_name)); + return *_cols[found->second]; } std::vector<const TabletIndex*> TabletSchema::get_indexes_for_column( @@ -1219,7 +1223,7 @@ std::vector<const TabletIndex*> TabletSchema::get_indexes_for_column( std::vector<const TabletIndex*> indexes_for_column; int32_t col_unique_id = col.unique_id(); const std::string& suffix_path = - !col.path_info().empty() ? escape_for_path_name(col.path_info().get_path()) : ""; + col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; // TODO use more efficient impl for (size_t i = 0; i < _indexes.size(); i++) { for (int32_t id : _indexes[i].col_unique_ids()) { @@ -1247,7 +1251,7 @@ bool TabletSchema::has_inverted_index(const TabletColumn& col) const { // TODO use more efficient impl int32_t col_unique_id = col.unique_id(); const std::string& suffix_path = - !col.path_info().empty() ? escape_for_path_name(col.path_info().get_path()) : ""; + col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; for (size_t i = 0; i < _indexes.size(); i++) { if (_indexes[i].index_type() == IndexType::INVERTED) { for (int32_t id : _indexes[i].col_unique_ids()) { @@ -1269,7 +1273,6 @@ bool TabletSchema::has_inverted_index_with_index_id(int32_t index_id, return true; } } - return false; } @@ -1305,7 +1308,7 @@ const TabletIndex* TabletSchema::get_inverted_index(const TabletColumn& col) con // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants int32_t col_unique_id = col.unique_id() < 0 ? col.parent_unique_id() : col.unique_id(); const std::string& suffix_path = - !col.path_info().empty() ? escape_for_path_name(col.path_info().get_path()) : ""; + col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; return get_inverted_index(col_unique_id, suffix_path); } @@ -1344,7 +1347,7 @@ vectorized::Block TabletSchema::create_block( const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const { vectorized::Block block; for (int i = 0; i < return_columns.size(); ++i) { - const auto& col = _cols[return_columns[i]]; + const auto& col = *_cols[return_columns[i]]; bool is_nullable = (tablet_columns_need_convert_null != nullptr && tablet_columns_need_convert_null->find(return_columns[i]) != tablet_columns_need_convert_null->end()); @@ -1358,11 +1361,11 @@ vectorized::Block TabletSchema::create_block( vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const { vectorized::Block block; for (const auto& col : _cols) { - if (ignore_dropped_col && is_dropped_column(col)) { + if (ignore_dropped_col && is_dropped_column(*col)) { continue; } - auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col); - block.insert({data_type->create_column(), data_type, col.name()}); + auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col); + block.insert({data_type->create_column(), data_type, col->name()}); } return block; } @@ -1370,7 +1373,7 @@ vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const { vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) { vectorized::Block block; for (const auto& cid : cids) { - auto col = _cols[cid]; + const auto& col = *_cols[cid]; auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col); block.insert({data_type->create_column(), data_type, col.name()}); } @@ -1397,6 +1400,11 @@ bool operator==(const TabletColumn& a, const TabletColumn& b) { if (a._index_length != b._index_length) return false; if (a._is_bf_column != b._is_bf_column) return false; if (a._has_bitmap_index != b._has_bitmap_index) return false; + if (a._column_path == nullptr && a._column_path != nullptr) return false; + if (b._column_path == nullptr && a._column_path != nullptr) return false; + if (b._column_path != nullptr && a._column_path != nullptr && + *a._column_path != *b._column_path) + return false; return true; } @@ -1408,7 +1416,7 @@ bool operator==(const TabletSchema& a, const TabletSchema& b) { if (a._keys_type != b._keys_type) return false; if (a._cols.size() != b._cols.size()) return false; for (int i = 0; i < a._cols.size(); ++i) { - if (a._cols[i] != b._cols[i]) return false; + if (*a._cols[i] != *b._cols[i]) return false; } if (a._num_columns != b._num_columns) return false; if (a._num_key_columns != b._num_key_columns) return false; diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 78f74a761f0..40857ab7427 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -21,6 +21,7 @@ #include <gen_cpp/olap_common.pb.h> #include <gen_cpp/olap_file.pb.h> #include <gen_cpp/segment_v2.pb.h> +#include <parallel_hashmap/phmap.h> #include <stddef.h> #include <stdint.h> @@ -39,7 +40,9 @@ #include "runtime/descriptors.h" #include "util/string_util.h" #include "vec/aggregate_functions/aggregate_function.h" +#include "vec/common/string_ref.h" #include "vec/common/string_utils/string_utils.h" +#include "vec/core/types.h" #include "vec/json/path_in_data.h" namespace doris { @@ -52,6 +55,9 @@ class IDataType; struct OlapTableIndexSchema; class TColumn; class TOlapTableIndex; +class TabletColumn; + +using TabletColumnPtr = std::shared_ptr<TabletColumn>; class TabletColumn { public: @@ -129,8 +135,8 @@ public: void add_sub_column(TabletColumn& sub_column); uint32_t get_subtype_count() const { return _sub_column_count; } - const TabletColumn& get_sub_column(uint32_t i) const { return _sub_columns[i]; } - const std::vector<TabletColumn>& get_sub_columns() const { return _sub_columns; } + const TabletColumn& get_sub_column(uint32_t i) const { return *_sub_columns[i]; } + const std::vector<TabletColumnPtr>& get_sub_columns() const { return _sub_columns; } friend bool operator==(const TabletColumn& a, const TabletColumn& b); friend bool operator!=(const TabletColumn& a, const TabletColumn& b); @@ -144,16 +150,19 @@ public: bool is_row_store_column() const; std::string get_aggregation_name() const { return _aggregation_name; } bool get_result_is_nullable() const { return _result_is_nullable; } - const vectorized::PathInData& path_info() const { return _column_path; } + bool has_path_info() const { return _column_path != nullptr && !_column_path->empty(); } + const vectorized::PathInDataPtr& path_info_ptr() const { return _column_path; } // If it is an extracted column from variant column - bool is_extracted_column() const { return !_column_path.empty() && _parent_col_unique_id > 0; }; + bool is_extracted_column() const { + return _column_path != nullptr && !_column_path->empty() && _parent_col_unique_id > 0; + }; int32_t parent_unique_id() const { return _parent_col_unique_id; } void set_parent_unique_id(int32_t col_unique_id) { _parent_col_unique_id = col_unique_id; } std::shared_ptr<const vectorized::IDataType> get_vec_type() const; void append_sparse_column(TabletColumn column); const TabletColumn& sparse_column_at(size_t oridinal) const; - const std::vector<TabletColumn>& sparse_columns() const; + const std::vector<TabletColumnPtr>& sparse_columns() const; size_t num_sparse_columns() const { return _num_sparse_columns; } private: @@ -175,29 +184,30 @@ private: std::string _default_value; bool _is_decimal = false; - int32_t _precision; - int32_t _frac; + int32_t _precision = -1; + int32_t _frac = -1; - int32_t _length; - int32_t _index_length; + int32_t _length = -1; + int32_t _index_length = -1; bool _is_bf_column = false; bool _has_bitmap_index = false; bool _visible = true; int32_t _parent_col_unique_id = -1; - std::vector<TabletColumn> _sub_columns; + std::vector<TabletColumnPtr> _sub_columns; uint32_t _sub_column_count = 0; bool _result_is_nullable = false; - vectorized::PathInData _column_path; + vectorized::PathInDataPtr _column_path; // Record information about columns merged into a sparse column within a variant // `{"id": 100, "name" : "jack", "point" : 3.9}` // If the information mentioned above is inserted into the variant column, // 'id' and 'name' are correctly extracted, while 'point' is merged into the sparse column due to its sparsity. // The path_info and type of 'point' will be recorded using the TabletColumn. - std::vector<TabletColumn> _sparse_cols; + // Use shared_ptr for reuse and reducing column memory usage + std::vector<TabletColumnPtr> _sparse_cols; size_t _num_sparse_columns = 0; }; @@ -239,7 +249,7 @@ public: void set_escaped_escaped_index_suffix_path(const std::string& name); private: - int64_t _index_id; + int64_t _index_id = -1; // Identify the different index with the same _index_id std::string _escaped_index_suffix_path; std::string _index_name; @@ -282,8 +292,8 @@ public: Status have_column(const std::string& field_name) const; const TabletColumn& column_by_uid(int32_t col_unique_id) const; TabletColumn& mutable_column_by_uid(int32_t col_unique_id); - const std::vector<TabletColumn>& columns() const; - std::vector<TabletColumn>& mutable_columns(); + void replace_column(size_t pos, TabletColumn new_col); + const std::vector<TabletColumnPtr>& columns() const; size_t num_columns() const { return _num_columns; } size_t num_key_columns() const { return _num_key_columns; } const std::vector<uint32_t>& cluster_key_idxes() const { return _cluster_key_idxes; } @@ -380,7 +390,7 @@ public: if (str.size() > 1) { str += ", "; } - str += p.first + "(" + std::to_string(_cols[p.second].unique_id()) + ")"; + str += p.first.to_string() + "(" + std::to_string(_cols[p.second]->unique_id()) + ")"; } str += "]"; return str; @@ -394,12 +404,12 @@ public: str += ", "; } str += "("; - str += p.name(); + str += p->name(); str += ", "; - str += TabletColumn::get_string_by_field_type(p.type()); + str += TabletColumn::get_string_by_field_type(p->type()); str += ", "; str += "is_nullable:"; - str += (p.is_nullable() ? "true" : "false"); + str += (p->is_nullable() ? "true" : "false"); str += ")"; } str += "]"; @@ -420,12 +430,12 @@ private: KeysType _keys_type = DUP_KEYS; SortType _sort_type = SortType::LEXICAL; size_t _sort_col_num = 0; - std::vector<TabletColumn> _cols; + std::vector<TabletColumnPtr> _cols; std::vector<TabletIndex> _indexes; - std::unordered_map<std::string, int32_t> _field_name_to_index; + std::unordered_map<StringRef, int32_t, StringRefHash> _field_name_to_index; std::unordered_map<int32_t, int32_t> _field_id_to_index; - std::unordered_map<vectorized::PathInData, int32_t, vectorized::PathInData::Hash> + std::unordered_map<vectorized::PathInDataRef, int32_t, vectorized::PathInDataRef::Hash> _field_path_to_index; size_t _num_columns = 0; size_t _num_variant_columns = 0; diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index f06446123d0..c81a5e90c3c 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -816,24 +816,24 @@ void PInternalServiceImpl::_get_column_ids_by_tablet_ids( std::set<int32_t> column_ids; for (const auto& col : columns) { - column_ids.insert(col.unique_id()); + column_ids.insert(col->unique_id()); } filter_set.insert(std::move(column_ids)); if (id_to_column.empty()) { for (const auto& col : columns) { - id_to_column.insert(std::pair {col.unique_id(), &col}); + id_to_column.insert(std::pair {col->unique_id(), col.get()}); } } else { for (const auto& col : columns) { - auto it = id_to_column.find(col.unique_id()); - if (it == id_to_column.end() || *(it->second) != col) { + auto it = id_to_column.find(col->unique_id()); + if (it == id_to_column.end() || *(it->second) != *col) { ColumnPB prev_col_pb; ColumnPB curr_col_pb; if (it != id_to_column.end()) { it->second->to_schema_pb(&prev_col_pb); } - col.to_schema_pb(&curr_col_pb); + col->to_schema_pb(&curr_col_pb); std::stringstream ss; ss << "consistency check failed: index{ " << index_id << " }" << " got inconsistent schema, prev column: " << prev_col_pb.DebugString() @@ -864,7 +864,7 @@ void PInternalServiceImpl::_get_column_ids_by_tablet_ids( entry->set_index_id(index_id); auto col_name_to_id = entry->mutable_col_name_to_id(); for (const auto& column : columns) { - (*col_name_to_id)[column.name()] = column.unique_id(); + (*col_name_to_id)[column->name()] = column->unique_id(); } } response->mutable_status()->set_status_code(TStatusCode::OK); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index b3fa9234cb3..98148fa55bc 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -314,12 +314,12 @@ void update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas, // Types of subcolumns by path from all tuples. std::map<PathInData, DataTypes> subcolumns_types; for (const TabletSchemaSPtr& schema : schemas) { - for (const TabletColumn& col : schema->columns()) { + for (const TabletColumnPtr& col : schema->columns()) { // Get subcolumns of this variant - if (!col.path_info().empty() && col.parent_unique_id() > 0 && - col.parent_unique_id() == variant_col_unique_id) { - subcolumns_types[col.path_info()].push_back( - DataTypeFactory::instance().create_data_type(col, col.is_nullable())); + if (col->has_path_info() && col->parent_unique_id() > 0 && + col->parent_unique_id() == variant_col_unique_id) { + subcolumns_types[*col->path_info_ptr()].push_back( + DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } } @@ -328,15 +328,15 @@ void update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas, // maybe dropped continue; } - for (const TabletColumn& col : - schema->mutable_column_by_uid(variant_col_unique_id).sparse_columns()) { + for (const TabletColumnPtr& col : + schema->column_by_uid(variant_col_unique_id).sparse_columns()) { // Get subcolumns of this variant - if (!col.path_info().empty() && col.parent_unique_id() > 0 && - col.parent_unique_id() == variant_col_unique_id && + if (col->has_path_info() && col->parent_unique_id() > 0 && + col->parent_unique_id() == variant_col_unique_id && // this column have been found in origin columns - subcolumns_types.find(col.path_info()) != subcolumns_types.end()) { - subcolumns_types[col.path_info()].push_back( - DataTypeFactory::instance().create_data_type(col, col.is_nullable())); + subcolumns_types.find(*col->path_info_ptr()) != subcolumns_types.end()) { + subcolumns_types[*col->path_info_ptr()].push_back( + DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } } @@ -354,14 +354,14 @@ void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas, // maybe dropped continue; } - for (const TabletColumn& col : - schema->mutable_column_by_uid(variant_col_unique_id).sparse_columns()) { + for (const TabletColumnPtr& col : + schema->column_by_uid(variant_col_unique_id).sparse_columns()) { // Get subcolumns of this variant - if (!col.path_info().empty() && col.parent_unique_id() > 0 && - col.parent_unique_id() == variant_col_unique_id && - path_set.find(col.path_info()) == path_set.end()) { - subcolumns_types[col.path_info()].push_back( - DataTypeFactory::instance().create_data_type(col, col.is_nullable())); + if (col->has_path_info() && col->parent_unique_id() > 0 && + col->parent_unique_id() == variant_col_unique_id && + path_set.find(*col->path_info_ptr()) == path_set.end()) { + subcolumns_types[*col->path_info_ptr()].push_back( + DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } } @@ -372,29 +372,29 @@ void inherit_tablet_index(TabletSchemaSPtr& schema) { std::unordered_map<int32_t, TabletIndex> variants_index_meta; // Get all variants tablet index metas if exist for (const auto& col : schema->columns()) { - auto index_meta = schema->get_inverted_index(col.unique_id(), ""); - if (col.is_variant_type() && index_meta != nullptr) { - variants_index_meta.emplace(col.unique_id(), *index_meta); + auto index_meta = schema->get_inverted_index(col->unique_id(), ""); + if (col->is_variant_type() && index_meta != nullptr) { + variants_index_meta.emplace(col->unique_id(), *index_meta); } } // Add index meta if extracted column is missing index meta for (const auto& col : schema->columns()) { - if (!col.is_extracted_column()) { + if (!col->is_extracted_column()) { continue; } - auto it = variants_index_meta.find(col.parent_unique_id()); + auto it = variants_index_meta.find(col->parent_unique_id()); // variant has no index meta, ignore if (it == variants_index_meta.end()) { continue; } - auto index_meta = schema->get_inverted_index(col); + auto index_meta = schema->get_inverted_index(*col); // add index meta TabletIndex index_info = it->second; - index_info.set_escaped_escaped_index_suffix_path(col.path_info().get_path()); + index_info.set_escaped_escaped_index_suffix_path(col->path_info_ptr()->get_path()); if (index_meta != nullptr) { // already exist - schema->update_index(col, index_info); + schema->update_index(*col, index_info); } else { schema->append_index(index_info); } @@ -415,12 +415,12 @@ Status get_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas, // Merge columns from other schemas output_schema->clear_columns(); // Get all columns without extracted columns and collect variant col unique id - for (const TabletColumn& col : base_schema->columns()) { - if (col.is_variant_type()) { - variant_column_unique_id.push_back(col.unique_id()); + for (const TabletColumnPtr& col : base_schema->columns()) { + if (col->is_variant_type()) { + variant_column_unique_id.push_back(col->unique_id()); } - if (!col.is_extracted_column()) { - output_schema->append_column(col); + if (!col->is_extracted_column()) { + output_schema->append_column(*col); } } }; @@ -646,7 +646,7 @@ void rebuild_schema_and_block(const TabletSchemaSPtr& original, bool is_nullable = column_ref->is_nullable(); const vectorized::ColumnObject& object_column = assert_cast<vectorized::ColumnObject&>( remove_nullable(column_ref)->assume_mutable_ref()); - const TabletColumn& parent_column = original->columns()[variant_pos]; + const TabletColumn& parent_column = *original->columns()[variant_pos]; CHECK(object_column.is_finalized()); std::shared_ptr<vectorized::ColumnObject::Subcolumns::Node> root; // common extracted columns @@ -689,7 +689,9 @@ void rebuild_schema_and_block(const TabletSchemaSPtr& original, vectorized::PathInDataBuilder full_root_path_builder; auto full_root_path = full_root_path_builder.append(parent_column.name_lower_case(), false).build(); - flush_schema->mutable_columns()[variant_pos].set_path_info(full_root_path); + TabletColumn new_col = flush_schema->column(variant_pos); + new_col.set_path_info(full_root_path); + flush_schema->replace_column(variant_pos, new_col); VLOG_DEBUG << "set root_path : " << full_root_path.get_path(); } diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 18b79523fe3..8f2e99a5614 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -95,7 +95,7 @@ static std::string read_columns_to_string(TabletSchemaSPtr tablet_schema, if (it != read_columns.cbegin()) { read_columns_string += ", "; } - read_columns_string += tablet_schema->columns().at(*it).name(); + read_columns_string += tablet_schema->columns().at(*it)->name(); if (i >= col_per_line) { read_columns_string += "\n"; i = 0; @@ -433,7 +433,7 @@ Status NewOlapScanner::_init_variant_columns() { TabletColumn subcol = TabletColumn::create_materialized_variant_column( tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(), slot->column_paths(), slot->col_unique_id()); - if (tablet_schema->field_index(subcol.path_info()) < 0) { + if (tablet_schema->field_index(*subcol.path_info_ptr()) < 0) { tablet_schema->append_column(subcol, TabletSchema::ColumnType::VARIANT); } } diff --git a/be/src/vec/json/path_in_data.cpp b/be/src/vec/json/path_in_data.cpp index ae91b444994..4b3692f4776 100644 --- a/be/src/vec/json/path_in_data.cpp +++ b/be/src/vec/json/path_in_data.cpp @@ -22,6 +22,8 @@ #include <assert.h> +#include <string_view> + #include "vec/common/sip_hash.h" namespace doris::vectorized { @@ -118,12 +120,15 @@ void PathInData::from_protobuf(const segment_v2::ColumnPathInfo& pb) { path = pb.path(); has_nested = pb.has_has_nested(); parts.reserve(pb.path_part_infos().size()); + const char* begin = path.data(); for (const segment_v2::ColumnPathPartInfo& part_info : pb.path_part_infos()) { Part part; part.is_nested = part_info.is_nested(); part.anonymous_array_level = part_info.anonymous_array_level(); - part.key = part_info.key(); + // use string_view to ref data in path + part.key = std::string_view {begin, part_info.key().length()}; parts.push_back(part); + begin += part.key.length() + 1; } } diff --git a/be/src/vec/json/path_in_data.h b/be/src/vec/json/path_in_data.h index aba700bb2f7..1367970f10d 100644 --- a/be/src/vec/json/path_in_data.h +++ b/be/src/vec/json/path_in_data.h @@ -23,6 +23,7 @@ #include <stddef.h> #include <algorithm> +#include <memory> #include <string> #include <string_view> #include <vector> @@ -35,6 +36,8 @@ namespace doris::vectorized { /// Class that represents path in document, e.g. JSON. +class PathInData; +using PathInDataPtr = std::shared_ptr<PathInData>; class PathInData { public: struct Part { @@ -100,6 +103,7 @@ private: /// Cached to avoid linear complexity at 'has_nested'. bool has_nested = false; }; + class PathInDataBuilder { public: const PathInData::Parts& get_parts() const { return parts; } @@ -126,4 +130,16 @@ struct ParseResult { std::vector<PathInData> paths; std::vector<Field> values; }; + +struct PathInDataRef { + const PathInData* ref; + struct Hash { + size_t operator()(const PathInDataRef& value) const { + return PathInData::Hash {}(*value.ref); + } + }; + PathInDataRef(const PathInData* ptr) : ref(ptr) {} + bool operator==(const PathInDataRef& other) const { return *this->ref == *other.ref; } +}; + } // namespace doris::vectorized diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp index 0251dc12974..006cb6b8f79 100644 --- a/be/src/vec/jsonb/serialize.cpp +++ b/be/src/vec/jsonb/serialize.cpp @@ -55,7 +55,7 @@ void JsonbSerializeUtil::block_to_jsonb(const TabletSchema& schema, const Block& jsonb_writer.writeStartObject(); for (int j = 0; j < num_cols; ++j) { const auto& column = block.get_by_position(j).column; - const auto& tablet_column = schema.columns()[j]; + const auto& tablet_column = *schema.columns()[j]; if (tablet_column.is_row_store_column()) { // ignore dst row store column continue; diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index 7213ded16e1..3da1f7c8678 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -57,7 +57,7 @@ OlapBlockDataConvertor::OlapBlockDataConvertor(const TabletSchema* tablet_schema assert(tablet_schema); const auto& columns = tablet_schema->columns(); for (const auto& col : columns) { - _convertors.emplace_back(create_olap_column_data_convertor(col)); + _convertors.emplace_back(create_olap_column_data_convertor(*col)); } } diff --git a/be/test/olap/delete_bitmap_calculator_test.cpp b/be/test/olap/delete_bitmap_calculator_test.cpp index 4941ce39fd6..00856f69f10 100644 --- a/be/test/olap/delete_bitmap_calculator_test.cpp +++ b/be/test/olap/delete_bitmap_calculator_test.cpp @@ -50,20 +50,20 @@ static RowsetId rowset_id {0}; using Generator = std::function<void(size_t rid, int cid, RowCursorCell& cell)>; -static TabletColumn create_int_sequence_value(int32_t id, bool is_nullable = true, - bool is_bf_column = false, - bool has_bitmap_index = false) { - TabletColumn column; - column._unique_id = id; - column._col_name = std::to_string(id); - column._type = FieldType::OLAP_FIELD_TYPE_INT; - column._is_key = false; - column._is_nullable = is_nullable; - column._length = 4; - column._index_length = 4; - column._is_bf_column = is_bf_column; - column._has_bitmap_index = has_bitmap_index; - column.set_name(SEQUENCE_COL); +static TabletColumnPtr create_int_sequence_value(int32_t id, bool is_nullable = true, + bool is_bf_column = false, + bool has_bitmap_index = false) { + TabletColumnPtr column = std::make_shared<TabletColumn>(); + column->_unique_id = id; + column->_col_name = std::to_string(id); + column->_type = FieldType::OLAP_FIELD_TYPE_INT; + column->_is_key = false; + column->_is_nullable = is_nullable; + column->_length = 4; + column->_index_length = 4; + column->_is_bf_column = is_bf_column; + column->_has_bitmap_index = has_bitmap_index; + column->set_name(SEQUENCE_COL); return column; } @@ -82,12 +82,12 @@ public: EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kSegmentDir).ok()); } - TabletSchemaSPtr create_schema(const std::vector<TabletColumn>& columns, + TabletSchemaSPtr create_schema(const std::vector<TabletColumnPtr>& columns, KeysType keys_type = UNIQUE_KEYS) { TabletSchemaSPtr res = std::make_shared<TabletSchema>(); for (auto& col : columns) { - res->append_column(col); + res->append_column(*col); } res->_keys_type = keys_type; return res; @@ -144,7 +144,7 @@ public: size_t const num_columns = num_key_columns + has_sequence_col + num_value_columns; size_t const seq_col_idx = has_sequence_col ? num_key_columns : -1; - std::vector<TabletColumn> columns; + std::vector<TabletColumnPtr> columns; for (int i = 0; i < num_key_columns; ++i) { columns.emplace_back(create_int_key(i)); diff --git a/be/test/olap/memtable_flush_executor_test.cpp b/be/test/olap/memtable_flush_executor_test.cpp index 23d142ed7e0..687511a47d1 100644 --- a/be/test/olap/memtable_flush_executor_test.cpp +++ b/be/test/olap/memtable_flush_executor_test.cpp @@ -69,13 +69,16 @@ void tear_down() { } Schema create_schema() { - std::vector<TabletColumn> col_schemas; - col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, - FieldType::OLAP_FIELD_TYPE_SMALLINT, true); - col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, - FieldType::OLAP_FIELD_TYPE_INT, true); - col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM, - FieldType::OLAP_FIELD_TYPE_BIGINT, true); + std::vector<TabletColumnPtr> col_schemas; + col_schemas.emplace_back( + std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_SMALLINT, true)); + col_schemas.emplace_back( + std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_INT, true)); + col_schemas.emplace_back( + std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM, + FieldType::OLAP_FIELD_TYPE_BIGINT, true)); Schema schema(col_schemas, 2); return schema; } diff --git a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp index f4dcb69b048..bf1cc69e900 100644 --- a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp @@ -156,8 +156,8 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) { std::string filename = kTestDir + "/NormalTestIntPage"; auto fs = io::global_local_filesystem(); - TabletColumn int_column = create_int_key(0); - Field* field = FieldFactory::create(int_column); + TabletColumnPtr int_column = create_int_key(0); + Field* field = FieldFactory::create(*int_column); std::unique_ptr<ZoneMapIndexWriter> builder(nullptr); static_cast<void>(ZoneMapIndexWriter::create(field, builder)); @@ -210,25 +210,25 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) { // Test for string TEST_F(ColumnZoneMapTest, NormalTestVarcharPage) { - TabletColumn varchar_column = create_varchar_key(0); - Field* field = FieldFactory::create(varchar_column); + TabletColumnPtr varchar_column = create_varchar_key(0); + Field* field = FieldFactory::create(*varchar_column); test_string("NormalTestVarcharPage", field); delete field; } // Test for string TEST_F(ColumnZoneMapTest, NormalTestCharPage) { - TabletColumn char_column = create_char_key(0); - Field* field = FieldFactory::create(char_column); + TabletColumnPtr char_column = create_char_key(0); + Field* field = FieldFactory::create(*char_column); test_string("NormalTestCharPage", field); delete field; } // Test for zone map limit TEST_F(ColumnZoneMapTest, ZoneMapCut) { - TabletColumn varchar_column = create_varchar_key(0); - varchar_column.set_index_length(1024); - Field* field = FieldFactory::create(varchar_column); + TabletColumnPtr varchar_column = create_varchar_key(0); + varchar_column->set_index_length(1024); + Field* field = FieldFactory::create(*varchar_column); test_string("ZoneMapCut", field); delete field; } diff --git a/be/test/olap/tablet_schema_helper.cpp b/be/test/olap/tablet_schema_helper.cpp index f5ff2295357..da6720b384a 100644 --- a/be/test/olap/tablet_schema_helper.cpp +++ b/be/test/olap/tablet_schema_helper.cpp @@ -19,80 +19,81 @@ #include <string.h> +#include "olap/tablet_schema.h" #include "util/slice.h" #include "vec/common/arena.h" namespace doris { -TabletColumn create_int_key(int32_t id, bool is_nullable, bool is_bf_column, - bool has_bitmap_index) { - TabletColumn column; - column._unique_id = id; - column._col_name = std::to_string(id); - column._type = FieldType::OLAP_FIELD_TYPE_INT; - column._is_key = true; - column._is_nullable = is_nullable; - column._length = 4; - column._index_length = 4; - column._is_bf_column = is_bf_column; - column._has_bitmap_index = has_bitmap_index; +TabletColumnPtr create_int_key(int32_t id, bool is_nullable, bool is_bf_column, + bool has_bitmap_index) { + auto column = std::make_shared<TabletColumn>(); + column->_unique_id = id; + column->_col_name = std::to_string(id); + column->_type = FieldType::OLAP_FIELD_TYPE_INT; + column->_is_key = true; + column->_is_nullable = is_nullable; + column->_length = 4; + column->_index_length = 4; + column->_is_bf_column = is_bf_column; + column->_has_bitmap_index = has_bitmap_index; return column; } -TabletColumn create_int_value(int32_t id, FieldAggregationMethod agg_method, bool is_nullable, - const std::string default_value, bool is_bf_column, - bool has_bitmap_index) { - TabletColumn column; - column._unique_id = id; - column._col_name = std::to_string(id); - column._type = FieldType::OLAP_FIELD_TYPE_INT; - column._is_key = false; - column._aggregation = agg_method; - column._is_nullable = is_nullable; - column._length = 4; - column._index_length = 4; +TabletColumnPtr create_int_value(int32_t id, FieldAggregationMethod agg_method, bool is_nullable, + const std::string default_value, bool is_bf_column, + bool has_bitmap_index) { + auto column = std::make_shared<TabletColumn>(); + column->_unique_id = id; + column->_col_name = std::to_string(id); + column->_type = FieldType::OLAP_FIELD_TYPE_INT; + column->_is_key = false; + column->_aggregation = agg_method; + column->_is_nullable = is_nullable; + column->_length = 4; + column->_index_length = 4; if (default_value != "") { - column._has_default_value = true; - column._default_value = default_value; + column->_has_default_value = true; + column->_default_value = default_value; } - column._is_bf_column = is_bf_column; - column._has_bitmap_index = has_bitmap_index; + column->_is_bf_column = is_bf_column; + column->_has_bitmap_index = has_bitmap_index; return column; } -TabletColumn create_char_key(int32_t id, bool is_nullable) { - TabletColumn column; - column._unique_id = id; - column._col_name = std::to_string(id); - column._type = FieldType::OLAP_FIELD_TYPE_CHAR; - column._is_key = true; - column._is_nullable = is_nullable; - column._length = 8; - column._index_length = 1; +TabletColumnPtr create_char_key(int32_t id, bool is_nullable) { + auto column = std::make_shared<TabletColumn>(); + column->_unique_id = id; + column->_col_name = std::to_string(id); + column->_type = FieldType::OLAP_FIELD_TYPE_CHAR; + column->_is_key = true; + column->_is_nullable = is_nullable; + column->_length = 8; + column->_index_length = 1; return column; } -TabletColumn create_varchar_key(int32_t id, bool is_nullable) { - TabletColumn column; - column._unique_id = id; - column._col_name = std::to_string(id); - column._type = FieldType::OLAP_FIELD_TYPE_VARCHAR; - column._is_key = true; - column._is_nullable = is_nullable; - column._length = 65533; - column._index_length = 4; +TabletColumnPtr create_varchar_key(int32_t id, bool is_nullable) { + auto column = std::make_shared<TabletColumn>(); + column->_unique_id = id; + column->_col_name = std::to_string(id); + column->_type = FieldType::OLAP_FIELD_TYPE_VARCHAR; + column->_is_key = true; + column->_is_nullable = is_nullable; + column->_length = 65533; + column->_index_length = 4; return column; } -TabletColumn create_string_key(int32_t id, bool is_nullable) { - TabletColumn column; - column._unique_id = id; - column._col_name = std::to_string(id); - column._type = FieldType::OLAP_FIELD_TYPE_STRING; - column._is_key = true; - column._is_nullable = is_nullable; - column._length = 2147483643; - column._index_length = 4; +TabletColumnPtr create_string_key(int32_t id, bool is_nullable) { + auto column = std::make_shared<TabletColumn>(); + column->_unique_id = id; + column->_col_name = std::to_string(id); + column->_type = FieldType::OLAP_FIELD_TYPE_STRING; + column->_is_key = true; + column->_is_nullable = is_nullable; + column->_length = 2147483643; + column->_index_length = 4; return column; } diff --git a/be/test/olap/tablet_schema_helper.h b/be/test/olap/tablet_schema_helper.h index 547882a18cb..1e2a7ba7e79 100644 --- a/be/test/olap/tablet_schema_helper.h +++ b/be/test/olap/tablet_schema_helper.h @@ -20,6 +20,7 @@ #include <stddef.h> #include <stdint.h> +#include <memory> #include <string> #include "olap/olap_common.h" @@ -30,30 +31,30 @@ namespace vectorized { class Arena; } // namespace vectorized -TabletColumn create_int_key(int32_t id, bool is_nullable = true, bool is_bf_column = false, - bool has_bitmap_index = false); +TabletColumnPtr create_int_key(int32_t id, bool is_nullable = true, bool is_bf_column = false, + bool has_bitmap_index = false); -TabletColumn create_int_value( +TabletColumnPtr create_int_value( int32_t id, FieldAggregationMethod agg_method = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM, bool is_nullable = true, const std::string default_value = "", bool is_bf_column = false, bool has_bitmap_index = false); -TabletColumn create_char_key(int32_t id, bool is_nullable = true); +TabletColumnPtr create_char_key(int32_t id, bool is_nullable = true); -TabletColumn create_varchar_key(int32_t id, bool is_nullable = true); +TabletColumnPtr create_varchar_key(int32_t id, bool is_nullable = true); -TabletColumn create_string_key(int32_t id, bool is_nullable = true); +TabletColumnPtr create_string_key(int32_t id, bool is_nullable = true); template <FieldType type> -TabletColumn create_with_default_value(std::string default_value) { - TabletColumn column; - column._type = type; - column._is_nullable = true; - column._aggregation = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE; - column._has_default_value = true; - column._default_value = default_value; - column._length = 4; +TabletColumnPtr create_with_default_value(std::string default_value) { + auto column = std::make_shared<TabletColumn>(); + column->_type = type; + column->_is_nullable = true; + column->_aggregation = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE; + column->_has_default_value = true; + column->_default_value = default_value; + column->_length = 4; return column; } diff --git a/be/test/vec/exec/vgeneric_iterators_test.cpp b/be/test/vec/exec/vgeneric_iterators_test.cpp index 687fb3f36be..dac297448f1 100644 --- a/be/test/vec/exec/vgeneric_iterators_test.cpp +++ b/be/test/vec/exec/vgeneric_iterators_test.cpp @@ -45,15 +45,18 @@ public: }; Schema create_schema() { - std::vector<TabletColumn> col_schemas; - col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, - FieldType::OLAP_FIELD_TYPE_SMALLINT, true); + std::vector<TabletColumnPtr> col_schemas; + col_schemas.emplace_back( + std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_SMALLINT, true)); // c2: int - col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, - FieldType::OLAP_FIELD_TYPE_INT, true); + col_schemas.emplace_back( + std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_INT, true)); // c3: big int - col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM, - FieldType::OLAP_FIELD_TYPE_BIGINT, true); + col_schemas.emplace_back( + std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM, + FieldType::OLAP_FIELD_TYPE_BIGINT, true)); Schema schema(col_schemas, 2); return schema; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org