This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push: new e1f5c677fc0 [improve](performance) replace serialized_sparse_column->insert_many_defaults to serialized_sparse_column->resize (#49952) e1f5c677fc0 is described below commit e1f5c677fc0eb3fc7788f4941349c4e3cc6270b8 Author: lihangyu <lihan...@selectdb.com> AuthorDate: Fri Apr 11 14:24:34 2025 +0800 [improve](performance) replace serialized_sparse_column->insert_many_defaults to serialized_sparse_column->resize (#49952) for map type `insert_many_defaults` will do insert_default one by one --- .../rowset/segment_v2/hierarchical_data_reader.cpp | 15 +++++++++------ be/src/vec/columns/column_object.cpp | 18 ++++++------------ be/src/vec/columns/column_object.h | 6 ++++++ be/src/vec/data_types/data_type_object.cpp | 3 ++- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index 185a6d82422..f0af8f77894 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -318,19 +318,23 @@ Status HierarchicalDataReader::_process_sparse_column(vectorized::ColumnObject& using namespace vectorized; container_variant.clear_sparse_column(); if (!_sparse_column_reader) { - container_variant.get_sparse_column()->assume_mutable()->insert_many_defaults(nrows); + container_variant.get_sparse_column()->assume_mutable()->resize( + container_variant.get_sparse_column()->size() + nrows); + ENABLE_CHECK_CONSISTENCY(&container_variant); return Status::OK(); } // process sparse column if (_path.get_parts().empty()) { // directly use sparse column if access root container_variant.set_sparse_column(_sparse_column_reader->column->get_ptr()); + ENABLE_CHECK_CONSISTENCY(&container_variant); } else { const auto& offsets = assert_cast<const ColumnMap&>(*_sparse_column_reader->column).get_offsets(); /// Check if there is no data in shared data in current range. if (offsets.back() == offsets[-1]) { - container_variant.get_sparse_column()->assume_mutable()->insert_many_defaults(nrows); + container_variant.get_sparse_column()->assume_mutable()->resize( + container_variant.get_sparse_column()->size() + nrows); } else { // Read for variant sparse column // Example path: a.b @@ -402,6 +406,7 @@ Status HierarchicalDataReader::_process_sparse_column(vectorized::ColumnObject& } } } + ENABLE_CHECK_CONSISTENCY(&container_variant); return Status::OK(); } @@ -474,10 +479,8 @@ void SparseColumnExtractReader::_fill_path_column(vectorized::MutableColumnPtr& *var.get_subcolumn({}) /*root*/, null_map, StringRef {_path.data(), _path.size()}, _sparse_column->get_ptr(), 0, _sparse_column->size()); var.incr_num_rows(_sparse_column->size()); - var.get_sparse_column()->assume_mutable()->insert_many_defaults(_sparse_column->size()); -#ifndef NDEBUG - var.check_consistency(); -#endif + var.get_sparse_column()->assume_mutable()->resize(var.rows()); + ENABLE_CHECK_CONSISTENCY(&var); // _sparse_column->clear(); } diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 1b607a7e87e..fd71c0ff967 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -139,12 +139,6 @@ size_t get_number_of_dimensions(const IDataType& type) { } } // namespace -#ifdef NDEBUG -#define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */ -#else -#define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency() -#endif - // current nested level is 2, inside column object constexpr int CURRENT_SERIALIZE_NESTING_LEVEL = 2; @@ -662,7 +656,7 @@ ColumnObject::ColumnObject(int32_t max_subcolumns_count, DataTypePtr root_type, _max_subcolumns_count(max_subcolumns_count) { subcolumns.create_root( Subcolumn(std::move(root_column), root_type, is_nullable, true /*root*/)); - serialized_sparse_column->insert_many_defaults(num_rows); + serialized_sparse_column->resize(num_rows); ENABLE_CHECK_CONSISTENCY(this); } @@ -677,7 +671,7 @@ ColumnObject::ColumnObject(int32_t max_subcolumns_count, Subcolumns&& subcolumns "subcolumns count: {}", max_subcolumns_count, subcolumns_.size()); } - serialized_sparse_column->insert_many_defaults(num_rows); + serialized_sparse_column->resize(num_rows); } ColumnObject::ColumnObject(int32_t max_subcolumns_count, size_t size) @@ -822,7 +816,7 @@ void ColumnObject::insert_many_defaults(size_t length) { for (auto& entry : subcolumns) { entry->data.insert_many_defaults(length); } - serialized_sparse_column->insert_many_defaults(length); + serialized_sparse_column->resize(num_rows + length); num_rows += length; ENABLE_CHECK_CONSISTENCY(this); } @@ -1189,7 +1183,7 @@ void ColumnObject::insert_from_sparse_column_and_fill_remaing_dense_column( /// If no src subcolumns should be inserted into sparse column, insert defaults. if (sorted_src_subcolumn_for_sparse_column.empty()) { - serialized_sparse_column->insert_many_defaults(length); + serialized_sparse_column->resize(num_rows + length); } else { // Otherwise insert required src dense columns into sparse column. auto [sparse_column_keys, sparse_column_values] = get_sparse_data_paths_and_values(); @@ -1757,7 +1751,7 @@ Status ColumnObject::serialize_sparse_columns( CHECK(is_finalized()); if (remaing_subcolumns.empty()) { - serialized_sparse_column->insert_many_defaults(num_rows); + serialized_sparse_column->resize(num_rows); return Status::OK(); } serialized_sparse_column->reserve(num_rows); @@ -2052,7 +2046,7 @@ void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& colum } add_sub_column({}, std::move(column), type); if (serialized_sparse_column->empty()) { - serialized_sparse_column->insert_many_defaults(num_rows); + serialized_sparse_column->resize(num_rows); } ENABLE_CHECK_CONSISTENCY(this); } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 8a76151bba6..2747cbc89d7 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -84,6 +84,12 @@ struct FieldInfo { int precision = 0; }; +#ifdef NDEBUG +#define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */ +#else +#define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency() +#endif + /** A column that represents object with dynamic set of subcolumns. * Subcolumns are identified by paths in document and are stored in * a trie-like structure. ColumnObject is not suitable for writing into tables diff --git a/be/src/vec/data_types/data_type_object.cpp b/be/src/vec/data_types/data_type_object.cpp index 0ea10460cf5..457d66adc64 100644 --- a/be/src/vec/data_types/data_type_object.cpp +++ b/be/src/vec/data_types/data_type_object.cpp @@ -207,7 +207,8 @@ const char* DataTypeObject::deserialize(const char* buf, MutableColumnPtr* colum be_exec_version); column_object->set_sparse_column(std::move(sparse_column)); } else { - column_object->get_sparse_column()->assume_mutable()->insert_many_defaults(num_rows); + column_object->get_sparse_column()->assume_mutable()->resize( + column_object->get_sparse_column()->size() + num_rows); } if (!root_added && column_object->get_subcolumn({})) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org