This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new e1f5c677fc0 [improve](performance) replace 
serialized_sparse_column->insert_many_defaults to 
serialized_sparse_column->resize (#49952)
e1f5c677fc0 is described below

commit e1f5c677fc0eb3fc7788f4941349c4e3cc6270b8
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Fri Apr 11 14:24:34 2025 +0800

    [improve](performance) replace 
serialized_sparse_column->insert_many_defaults to 
serialized_sparse_column->resize (#49952)
    
    for map type `insert_many_defaults` will do insert_default one by one
---
 .../rowset/segment_v2/hierarchical_data_reader.cpp     | 15 +++++++++------
 be/src/vec/columns/column_object.cpp                   | 18 ++++++------------
 be/src/vec/columns/column_object.h                     |  6 ++++++
 be/src/vec/data_types/data_type_object.cpp             |  3 ++-
 4 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
index 185a6d82422..f0af8f77894 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
@@ -318,19 +318,23 @@ Status 
HierarchicalDataReader::_process_sparse_column(vectorized::ColumnObject&
     using namespace vectorized;
     container_variant.clear_sparse_column();
     if (!_sparse_column_reader) {
-        
container_variant.get_sparse_column()->assume_mutable()->insert_many_defaults(nrows);
+        container_variant.get_sparse_column()->assume_mutable()->resize(
+                container_variant.get_sparse_column()->size() + nrows);
+        ENABLE_CHECK_CONSISTENCY(&container_variant);
         return Status::OK();
     }
     // process sparse column
     if (_path.get_parts().empty()) {
         // directly use sparse column if access root
         
container_variant.set_sparse_column(_sparse_column_reader->column->get_ptr());
+        ENABLE_CHECK_CONSISTENCY(&container_variant);
     } else {
         const auto& offsets =
                 assert_cast<const 
ColumnMap&>(*_sparse_column_reader->column).get_offsets();
         /// Check if there is no data in shared data in current range.
         if (offsets.back() == offsets[-1]) {
-            
container_variant.get_sparse_column()->assume_mutable()->insert_many_defaults(nrows);
+            container_variant.get_sparse_column()->assume_mutable()->resize(
+                    container_variant.get_sparse_column()->size() + nrows);
         } else {
             // Read for variant sparse column
             // Example path: a.b
@@ -402,6 +406,7 @@ Status 
HierarchicalDataReader::_process_sparse_column(vectorized::ColumnObject&
             }
         }
     }
+    ENABLE_CHECK_CONSISTENCY(&container_variant);
     return Status::OK();
 }
 
@@ -474,10 +479,8 @@ void 
SparseColumnExtractReader::_fill_path_column(vectorized::MutableColumnPtr&
             *var.get_subcolumn({}) /*root*/, null_map, StringRef 
{_path.data(), _path.size()},
             _sparse_column->get_ptr(), 0, _sparse_column->size());
     var.incr_num_rows(_sparse_column->size());
-    
var.get_sparse_column()->assume_mutable()->insert_many_defaults(_sparse_column->size());
-#ifndef NDEBUG
-    var.check_consistency();
-#endif
+    var.get_sparse_column()->assume_mutable()->resize(var.rows());
+    ENABLE_CHECK_CONSISTENCY(&var);
     // _sparse_column->clear();
 }
 
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 1b607a7e87e..fd71c0ff967 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -139,12 +139,6 @@ size_t get_number_of_dimensions(const IDataType& type) {
 }
 } // namespace
 
-#ifdef NDEBUG
-#define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */
-#else
-#define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency()
-#endif
-
 // current nested level is 2, inside column object
 constexpr int CURRENT_SERIALIZE_NESTING_LEVEL = 2;
 
@@ -662,7 +656,7 @@ ColumnObject::ColumnObject(int32_t max_subcolumns_count, 
DataTypePtr root_type,
           _max_subcolumns_count(max_subcolumns_count) {
     subcolumns.create_root(
             Subcolumn(std::move(root_column), root_type, is_nullable, true 
/*root*/));
-    serialized_sparse_column->insert_many_defaults(num_rows);
+    serialized_sparse_column->resize(num_rows);
     ENABLE_CHECK_CONSISTENCY(this);
 }
 
@@ -677,7 +671,7 @@ ColumnObject::ColumnObject(int32_t max_subcolumns_count, 
Subcolumns&& subcolumns
                                "subcolumns count: {}",
                                max_subcolumns_count, subcolumns_.size());
     }
-    serialized_sparse_column->insert_many_defaults(num_rows);
+    serialized_sparse_column->resize(num_rows);
 }
 
 ColumnObject::ColumnObject(int32_t max_subcolumns_count, size_t size)
@@ -822,7 +816,7 @@ void ColumnObject::insert_many_defaults(size_t length) {
     for (auto& entry : subcolumns) {
         entry->data.insert_many_defaults(length);
     }
-    serialized_sparse_column->insert_many_defaults(length);
+    serialized_sparse_column->resize(num_rows + length);
     num_rows += length;
     ENABLE_CHECK_CONSISTENCY(this);
 }
@@ -1189,7 +1183,7 @@ void 
ColumnObject::insert_from_sparse_column_and_fill_remaing_dense_column(
 
         /// If no src subcolumns should be inserted into sparse column, insert 
defaults.
         if (sorted_src_subcolumn_for_sparse_column.empty()) {
-            serialized_sparse_column->insert_many_defaults(length);
+            serialized_sparse_column->resize(num_rows + length);
         } else {
             // Otherwise insert required src dense columns into sparse column.
             auto [sparse_column_keys, sparse_column_values] = 
get_sparse_data_paths_and_values();
@@ -1757,7 +1751,7 @@ Status ColumnObject::serialize_sparse_columns(
     CHECK(is_finalized());
 
     if (remaing_subcolumns.empty()) {
-        serialized_sparse_column->insert_many_defaults(num_rows);
+        serialized_sparse_column->resize(num_rows);
         return Status::OK();
     }
     serialized_sparse_column->reserve(num_rows);
@@ -2052,7 +2046,7 @@ void ColumnObject::create_root(const DataTypePtr& type, 
MutableColumnPtr&& colum
     }
     add_sub_column({}, std::move(column), type);
     if (serialized_sparse_column->empty()) {
-        serialized_sparse_column->insert_many_defaults(num_rows);
+        serialized_sparse_column->resize(num_rows);
     }
     ENABLE_CHECK_CONSISTENCY(this);
 }
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index 8a76151bba6..2747cbc89d7 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -84,6 +84,12 @@ struct FieldInfo {
     int precision = 0;
 };
 
+#ifdef NDEBUG
+#define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */
+#else
+#define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency()
+#endif
+
 /** A column that represents object with dynamic set of subcolumns.
  *  Subcolumns are identified by paths in document and are stored in
  *  a trie-like structure. ColumnObject is not suitable for writing into tables
diff --git a/be/src/vec/data_types/data_type_object.cpp 
b/be/src/vec/data_types/data_type_object.cpp
index 0ea10460cf5..457d66adc64 100644
--- a/be/src/vec/data_types/data_type_object.cpp
+++ b/be/src/vec/data_types/data_type_object.cpp
@@ -207,7 +207,8 @@ const char* DataTypeObject::deserialize(const char* buf, 
MutableColumnPtr* colum
                                                                   
be_exec_version);
         column_object->set_sparse_column(std::move(sparse_column));
     } else {
-        
column_object->get_sparse_column()->assume_mutable()->insert_many_defaults(num_rows);
+        column_object->get_sparse_column()->assume_mutable()->resize(
+                column_object->get_sparse_column()->size() + num_rows);
     }
 
     if (!root_added && column_object->get_subcolumn({})) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to