This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 62a6360a988 [Optimize](Variant) optimize schema update performance 
(#45480)
62a6360a988 is described below

commit 62a6360a9881743a501d7e5a74063abebadc14a8
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Fri Dec 20 16:33:15 2024 +0800

    [Optimize](Variant) optimize schema update performance (#45480)
    
    When update schema with high concurrency, updaing schemas cost is
    expensive.
    1. update schema only when rows is not 0
    2. copy_from is expensive, use copy constructor
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp |  4 +++-
 be/src/olap/rowset_builder.cpp                   | 24 +++++++++++++-----------
 be/src/olap/tablet_schema.cpp                    | 15 +++++++++++++++
 be/src/olap/tablet_schema.h                      |  3 +++
 be/src/vec/common/schema_util.cpp                |  5 ++---
 5 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index fe465f98a2a..2457a44de39 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -363,7 +363,9 @@ Status 
SegmentWriter::append_block_with_variant_subcolumns(vectorized::Block& da
             continue;
         }
         if (_flush_schema == nullptr) {
-            _flush_schema = std::make_shared<TabletSchema>(*_tablet_schema);
+            _flush_schema = std::make_shared<TabletSchema>();
+            // deep copy
+            _flush_schema->copy_from(*_tablet_schema);
         }
         auto column_ref = data.get_by_position(i).column;
         const vectorized::ColumnObject& object_column = 
assert_cast<vectorized::ColumnObject&>(
diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp
index ec7463d5b9d..ccc006e1f04 100644
--- a/be/src/olap/rowset_builder.cpp
+++ b/be/src/olap/rowset_builder.cpp
@@ -346,21 +346,22 @@ Status RowsetBuilder::commit_txn() {
     SCOPED_TIMER(_commit_txn_timer);
 
     const RowsetWriterContext& rw_ctx = _rowset_writer->context();
-    if (rw_ctx.tablet_schema->num_variant_columns() > 0) {
+    if (rw_ctx.tablet_schema->num_variant_columns() > 0 && _rowset->num_rows() 
> 0) {
         // Need to merge schema with `rw_ctx.merged_tablet_schema` in prior,
         // merged schema keeps the newest merged schema for the rowset, which 
is updated and merged
         // during flushing segments.
         if (rw_ctx.merged_tablet_schema != nullptr) {
             
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.merged_tablet_schema));
+        } else {
+            // We should merge rowset schema further, in case that the 
merged_tablet_schema maybe null
+            // when enable_memtable_on_sink_node is true, the 
merged_tablet_schema will not be passed to
+            // the destination backend.
+            // update tablet schema when meet variant columns, before 
commit_txn
+            // Eg. rowset schema:       A(int),    B(float),  C(int), D(int)
+            // _tabelt->tablet_schema:  A(bigint), B(double)
+            //  => update_schema:       A(bigint), B(double), C(int), D(int)
+            
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema));
         }
-        // We should merge rowset schema further, in case that the 
merged_tablet_schema maybe null
-        // when enable_memtable_on_sink_node is true, the merged_tablet_schema 
will not be passed to
-        // the destination backend.
-        // update tablet schema when meet variant columns, before commit_txn
-        // Eg. rowset schema:       A(int),    B(float),  C(int), D(int)
-        // _tabelt->tablet_schema:  A(bigint), B(double)
-        //  => update_schema:       A(bigint), B(double), C(int), D(int)
-        
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema));
     }
 
     // Transfer ownership of `PendingRowsetGuard` to `TxnManager`
@@ -398,7 +399,6 @@ Status BaseRowsetBuilder::cancel() {
 void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id,
                                                      const 
OlapTableSchemaParam* table_schema_param,
                                                      const TabletSchema& 
ori_tablet_schema) {
-    _tablet_schema->copy_from(ori_tablet_schema);
     // find the right index id
     int i = 0;
     auto indexes = table_schema_param->indexes();
@@ -407,11 +407,13 @@ void 
BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id,
             break;
         }
     }
-
     if (!indexes.empty() && !indexes[i]->columns.empty() &&
         indexes[i]->columns[0]->unique_id() >= 0) {
+        _tablet_schema->shawdow_copy_without_columns(ori_tablet_schema);
         _tablet_schema->build_current_tablet_schema(index_id, 
table_schema_param->version(),
                                                     indexes[i], 
ori_tablet_schema);
+    } else {
+        _tablet_schema->copy_from(ori_tablet_schema);
     }
     if (_tablet_schema->schema_version() > ori_tablet_schema.schema_version()) 
{
         // After schema change, should include extracted column
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 3ec5d221664..7b6b5f313c1 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -1064,6 +1064,21 @@ void TabletSchema::copy_from(const TabletSchema& 
tablet_schema) {
     _table_id = tablet_schema.table_id();
 }
 
+void TabletSchema::shawdow_copy_without_columns(const TabletSchema& 
tablet_schema) {
+    *this = tablet_schema;
+    _field_path_to_index.clear();
+    _field_name_to_index.clear();
+    _field_id_to_index.clear();
+    _num_columns = 0;
+    _num_variant_columns = 0;
+    _num_null_columns = 0;
+    _num_key_columns = 0;
+    _cols.clear();
+    _vl_field_mem_size = 0;
+    // notice : do not ref columns
+    _column_cache_handlers.clear();
+}
+
 void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
     for (auto& col : _cols) {
         if (col->unique_id() < 0) {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index c813d6f0ef8..3dfe055fbf4 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -330,6 +330,8 @@ public:
     // Must make sure the row column is always the last column
     void add_row_column();
     void copy_from(const TabletSchema& tablet_schema);
+    // lightweight copy, take care of lifecycle of TabletColumn
+    void shawdow_copy_without_columns(const TabletSchema& tablet_schema);
     void update_index_info_from(const TabletSchema& tablet_schema);
     std::string to_key() const;
     // get_metadata_size is only the memory of the TabletSchema itself, not 
include child objects.
@@ -531,6 +533,7 @@ public:
 private:
     friend bool operator==(const TabletSchema& a, const TabletSchema& b);
     friend bool operator!=(const TabletSchema& a, const TabletSchema& b);
+    TabletSchema(const TabletSchema&) = default;
 
     void clear_column_cache_handlers();
 
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index 2b1c71c643d..2b53fc24702 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -415,9 +415,8 @@ Status get_least_common_schema(const 
std::vector<TabletSchemaSPtr>& schemas,
     // duplicated paths following the update_least_common_schema process.
     auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr& 
base_schema) {
         output_schema = std::make_shared<TabletSchema>();
-        output_schema->copy_from(*base_schema);
-        // Merge columns from other schemas
-        output_schema->clear_columns();
+        // not copy columns but only shadow copy other attributes
+        output_schema->shawdow_copy_without_columns(*base_schema);
         // Get all columns without extracted columns and collect variant col 
unique id
         for (const TabletColumnPtr& col : base_schema->columns()) {
             if (col->is_variant_type()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to