This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6cc2d4a3e5c [Improve](TabletSchema) optimize finding TabletIndex 
performance in TabletSchema (#48699)
6cc2d4a3e5c is described below

commit 6cc2d4a3e5c08520584ddf11e10defede4b82736
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Thu Mar 27 21:32:29 2025 +0800

    [Improve](TabletSchema) optimize finding TabletIndex performance in 
TabletSchema (#48699)
    
    Add an `unordered_map` to accelerate TabletIndex search speed
    
    
    
![image](https://github.com/user-attachments/assets/ff139fdc-c279-4629-a48b-3d6a9cf4d6f5)
---
 be/src/olap/tablet_schema.cpp             |  91 +++++++++-------
 be/src/olap/tablet_schema.h               |  17 +++
 be/test/olap/tablet_schema_index_test.cpp | 174 ++++++++++++++++++++++++++++++
 3 files changed, 244 insertions(+), 38 deletions(-)

diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index a2c4f29ce0d..2ffc4a2d1a3 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -905,6 +905,10 @@ void TabletColumn::append_sparse_column(TabletColumn 
column) {
 }
 
 void TabletSchema::append_index(TabletIndex&& index) {
+    for (int32_t id : index.col_unique_ids()) {
+        _col_id_suffix_to_index.emplace(
+                std::make_tuple(index.index_type(), id, 
index.get_index_suffix()), _indexes.size());
+    }
     _indexes.push_back(std::make_shared<TabletIndex>(index));
 }
 
@@ -912,15 +916,14 @@ void TabletSchema::update_index(const TabletColumn& col, 
const IndexType& index_
                                 TabletIndex&& index) {
     int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() 
: col.unique_id();
     const std::string& suffix_path = escape_for_path_name(col.suffix_path());
-    for (auto& _indexe : _indexes) {
-        for (int32_t id : _indexe->col_unique_ids()) {
-            if (_indexe->index_type() == index_type && id == col_unique_id &&
-                _indexe->get_index_suffix() == suffix_path) {
-                _indexe = std::make_shared<TabletIndex>(std::move(index));
-                break;
-            }
-        }
+    IndexKey key(index_type, col_unique_id, suffix_path);
+    auto iter = _col_id_suffix_to_index.find(key);
+    if (iter != _col_id_suffix_to_index.end()) {
+        _indexes[iter->second] = 
std::make_shared<TabletIndex>(std::move(index));
+        return;
     }
+    LOG(WARNING) << " failed to update_index: " << index_type << " " << 
col_unique_id << " "
+                 << suffix_path;
 }
 
 void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
@@ -938,17 +941,25 @@ void TabletSchema::clear_index_cache_handlers() {
 void TabletSchema::clear_index() {
     clear_index_cache_handlers();
     _indexes.clear();
+    _col_id_suffix_to_index.clear();
 }
 
 void TabletSchema::remove_index(int64_t index_id) {
     std::vector<TabletIndexPtr> indexes;
+    std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
     for (auto index : _indexes) {
         if (index->index_id() == index_id) {
             continue;
         }
+        for (int32_t col_uid : index->col_unique_ids()) {
+            col_id_suffix_to_index.emplace(
+                    std::make_tuple(index->index_type(), col_uid, 
index->get_index_suffix()),
+                    indexes.size());
+        }
         indexes.emplace_back(std::move(index));
     }
     _indexes = std::move(indexes);
+    _col_id_suffix_to_index = std::move(col_id_suffix_to_index);
 }
 
 void TabletSchema::clear_columns() {
@@ -979,6 +990,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& 
schema, bool ignore_extrac
     _num_null_columns = 0;
     _cols.clear();
     _indexes.clear();
+    _col_id_suffix_to_index.clear();
     _field_name_to_index.clear();
     _field_id_to_index.clear();
     _cluster_key_uids.clear();
@@ -1032,6 +1044,11 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& 
schema, bool ignore_extrac
             index = std::make_shared<TabletIndex>();
             index->init_from_pb(index_pb);
         }
+        for (int32_t col_uid : index->col_unique_ids()) {
+            _col_id_suffix_to_index.emplace(
+                    std::make_tuple(index->index_type(), col_uid, 
index->get_index_suffix()),
+                    _indexes.size());
+        }
         _indexes.emplace_back(std::move(index));
     }
     _num_short_key_columns = schema.num_short_key_columns();
@@ -1150,6 +1167,7 @@ void TabletSchema::build_current_tablet_schema(int64_t 
index_id, int32_t version
     bool has_bf_columns = false;
     _cols.clear();
     _indexes.clear();
+    _col_id_suffix_to_index.clear();
     _field_name_to_index.clear();
     _field_id_to_index.clear();
     _delete_sign_idx = -1;
@@ -1189,7 +1207,12 @@ void TabletSchema::build_current_tablet_schema(int64_t 
index_id, int32_t version
         _num_columns++;
     }
 
-    for (auto& i : index->indexes) {
+    for (const auto& i : index->indexes) {
+        for (int32_t col_uid : i->col_unique_ids()) {
+            _col_id_suffix_to_index.emplace(
+                    std::make_tuple(i->index_type(), col_uid, 
i->get_index_suffix()),
+                    _indexes.size());
+        }
         _indexes.emplace_back(std::make_shared<TabletIndex>(*i));
     }
 
@@ -1371,6 +1394,15 @@ void TabletSchema::update_indexes_from_thrift(const 
std::vector<doris::TOlapTabl
         indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index)));
     }
     _indexes = std::move(indexes);
+    std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
+    for (size_t i = 0; i < _indexes.size(); i++) {
+        for (int32_t col_uid : _indexes[i]->col_unique_ids()) {
+            
col_id_suffix_to_index.emplace(std::make_tuple(_indexes[i]->index_type(), 
col_uid,
+                                                           
_indexes[i]->get_index_suffix()),
+                                           i);
+        }
+    }
+    _col_id_suffix_to_index = std::move(col_id_suffix_to_index);
 }
 
 bool TabletSchema::exist_column(const std::string& field_name) const {
@@ -1426,14 +1458,10 @@ bool 
TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
 const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
                                                 const std::string& 
suffix_path) const {
     const std::string escaped_suffix = escape_for_path_name(suffix_path);
-    for (const auto& _index : _indexes) {
-        if (_index->index_type() == IndexType::INVERTED) {
-            for (int32_t id : _index->col_unique_ids()) {
-                if (id == col_unique_id && _index->get_index_suffix() == 
escaped_suffix) {
-                    return _index.get();
-                }
-            }
-        }
+    auto it = _col_id_suffix_to_index.find(
+            std::make_tuple(IndexType::INVERTED, col_unique_id, 
escaped_suffix));
+    if (it != _col_id_suffix_to_index.end()) {
+        return _indexes[it->second].get();
     }
     return nullptr;
 }
@@ -1450,30 +1478,17 @@ const TabletIndex* TabletSchema::inverted_index(const 
TabletColumn& col) const {
 }
 
 bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
-    // TODO use more efficient impl
-    for (const auto& _index : _indexes) {
-        if (_index->index_type() == IndexType::NGRAM_BF) {
-            for (int32_t id : _index->col_unique_ids()) {
-                if (id == col_unique_id) {
-                    return true;
-                }
-            }
-        }
-    }
-
-    return false;
+    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
+    auto it = _col_id_suffix_to_index.find(index_key);
+    return it != _col_id_suffix_to_index.end();
 }
 
 const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) 
const {
-    // TODO use more efficient impl
-    for (const auto& _index : _indexes) {
-        if (_index->index_type() == IndexType::NGRAM_BF) {
-            for (int32_t id : _index->col_unique_ids()) {
-                if (id == col_unique_id) {
-                    return _index.get();
-                }
-            }
-        }
+    // Get the ngram bf index for the given column unique id
+    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
+    auto it = _col_id_suffix_to_index.find(index_key);
+    if (it != _col_id_suffix_to_index.end()) {
+        return _indexes[it->second].get();
     }
     return nullptr;
 }
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index cdc91981d24..8d0f3f111de 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -560,6 +560,23 @@ private:
     std::unordered_map<int32_t, int32_t> _field_id_to_index;
     std::unordered_map<vectorized::PathInDataRef, int32_t, 
vectorized::PathInDataRef::Hash>
             _field_path_to_index;
+
+    // index_type/col_unique_id/suffix -> idx in _indexes
+    using IndexKey = std::tuple<IndexType, int32_t, std::string>;
+    struct IndexKeyHash {
+        size_t operator()(const IndexKey& t) const {
+            std::size_t seed = 0;
+            seed = doris::HashUtil::hash((const char*)&std::get<0>(t), 
sizeof(std::get<0>(t)),
+                                         seed);
+            seed = doris::HashUtil::hash((const char*)&std::get<1>(t), 
sizeof(std::get<1>(t)),
+                                         seed);
+            seed = doris::HashUtil::hash((const char*)std::get<2>(t).c_str(), 
std::get<2>(t).size(),
+                                         seed);
+            return seed;
+        }
+    };
+    std::unordered_map<IndexKey, int32_t, IndexKeyHash> 
_col_id_suffix_to_index;
+
     size_t _num_columns = 0;
     size_t _num_variant_columns = 0;
     size_t _num_key_columns = 0;
diff --git a/be/test/olap/tablet_schema_index_test.cpp 
b/be/test/olap/tablet_schema_index_test.cpp
new file mode 100644
index 00000000000..b35bbdb2a47
--- /dev/null
+++ b/be/test/olap/tablet_schema_index_test.cpp
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "olap/tablet_schema.h"
+
+namespace doris {
+
+class TabletSchemaIndexTest : public testing::Test {
+protected:
+    void SetUp() override {
+        // Setup common test data
+        _tablet_schema = std::make_shared<TabletSchema>();
+    }
+
+    TabletIndex create_test_index(int64_t index_id, IndexType type,
+                                  const std::vector<int32_t>& col_uids,
+                                  const std::string& suffix = "") {
+        TabletIndex index;
+        index._index_id = index_id;
+        index._index_type = type;
+        index._col_unique_ids = col_uids;
+        index.set_escaped_escaped_index_suffix_path(suffix);
+        return index;
+    }
+
+    std::shared_ptr<TabletSchema> _tablet_schema;
+};
+
+TEST_F(TabletSchemaIndexTest, TestAddInvertedIndex) {
+    // Add inverted index with suffix
+    TabletIndex index = create_test_index(1, IndexType::INVERTED, {100}, 
"suffix1");
+    _tablet_schema->append_index(std::move(index));
+
+    // Verify index mapping
+    auto* found_index = _tablet_schema->inverted_index(100, "suffix1");
+    ASSERT_NE(found_index, nullptr);
+    EXPECT_EQ(found_index->index_id(), 1);
+    EXPECT_EQ(found_index->get_index_suffix(), "suffix1");
+}
+
+TEST_F(TabletSchemaIndexTest, TestRemoveIndex) {
+    // Add multiple indexes
+    _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, 
{100}, "suffix1"));
+    _tablet_schema->append_index(create_test_index(2, IndexType::INVERTED, 
{200}, "suffix2"));
+
+    // Remove index 1
+    _tablet_schema->remove_index(1);
+
+    // Verify index 1 removed
+    EXPECT_EQ(_tablet_schema->inverted_index(100, "suffix1"), nullptr);
+
+    // Verify index 2 still exists
+    auto* found_index = _tablet_schema->inverted_index(200, "suffix2");
+    ASSERT_NE(found_index, nullptr);
+    EXPECT_EQ(found_index->index_id(), 2);
+}
+
+TEST_F(TabletSchemaIndexTest, TestUpdateIndex) {
+    // Add initial index
+    _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, 
{100}, "old_suffix"));
+    ASSERT_NE(_tablet_schema->inverted_index(100, "old_suffix"), nullptr);
+
+    // Update index with new suffix
+    _tablet_schema->remove_index(1);
+    _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, 
{100}, "new_suffix"));
+
+    // Verify update
+    EXPECT_EQ(_tablet_schema->inverted_index(100, "old_suffix"), nullptr);
+    auto* found_index = _tablet_schema->inverted_index(100, "new_suffix");
+    ASSERT_NE(found_index, nullptr);
+    EXPECT_EQ(found_index->get_index_suffix(), "new%5Fsuffix");
+}
+
+TEST_F(TabletSchemaIndexTest, TestMultipleColumnsIndex) {
+    // Add index with multiple columns
+    TabletIndex index = create_test_index(1, IndexType::INVERTED, {100, 200}, 
"multi_col");
+    _tablet_schema->append_index(std::move(index));
+
+    // Verify both columns mapped
+    auto* index1 = _tablet_schema->inverted_index(100, "multi_col");
+    auto* index2 = _tablet_schema->inverted_index(200, "multi_col");
+    ASSERT_NE(index1, nullptr);
+    ASSERT_EQ(index1, index2); // Should point to same index
+}
+
+TEST_F(TabletSchemaIndexTest, TestDuplicateIndexKey) {
+    // Add two indexes with same (type,col,suffix)
+    _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, 
{100}, "suffix"));
+    _tablet_schema->append_index(create_test_index(2, IndexType::INVERTED, 
{100}, "suffix"));
+
+    // The last added should override
+    auto* found_index = _tablet_schema->inverted_index(100, "suffix");
+    ASSERT_NE(found_index, nullptr);
+    EXPECT_EQ(found_index->index_id(), 1);
+}
+
+TEST_F(TabletSchemaIndexTest, TestClearIndexes) {
+    _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, 
{100}));
+    _tablet_schema->clear_index();
+
+    EXPECT_EQ(_tablet_schema->inverted_index(100, ""), nullptr);
+    EXPECT_TRUE(_tablet_schema->inverted_indexes().empty());
+}
+
+TEST_F(TabletSchemaIndexTest, TestUpdateIndexMethod) {
+    TabletColumn col;
+    col.set_parent_unique_id(100);
+    col.set_path_info(vectorized::PathInData("v2"));
+    _tablet_schema->append_column(col);
+
+    TabletIndex old_index = create_test_index(1, IndexType::INVERTED, {100}, 
"v2");
+    _tablet_schema->append_index(std::move(old_index));
+
+    TabletIndex new_index = create_test_index(1, IndexType::INVERTED, {100}, 
"v2");
+    new_index._properties["new_prop"] = "value";
+
+    _tablet_schema->update_index(col, IndexType::INVERTED, 
std::move(new_index));
+
+    const TabletIndex* updated_index = _tablet_schema->inverted_index(100, 
"v2");
+    ASSERT_NE(updated_index, nullptr);
+    EXPECT_EQ(updated_index->index_id(), 1);
+    EXPECT_EQ(updated_index->properties().at("new_prop"), "value");
+
+    auto key = std::make_tuple(IndexType::INVERTED, 100, "v2");
+    EXPECT_NE(_tablet_schema->_col_id_suffix_to_index.find(key),
+              _tablet_schema->_col_id_suffix_to_index.end());
+}
+
+TEST_F(TabletSchemaIndexTest, TestUpdateIndexAddNewWhenNotExist) {
+    // Not exist, return nullptr
+    TabletColumn col;
+    col.set_unique_id(200);
+
+    TabletIndex new_index = create_test_index(2, IndexType::INVERTED, {200}, 
"v3");
+    _tablet_schema->update_index(col, IndexType::INVERTED, 
std::move(new_index));
+
+    const TabletIndex* index = _tablet_schema->inverted_index(200, "v3");
+    ASSERT_EQ(index, nullptr);
+}
+
+TEST_F(TabletSchemaIndexTest, TestUpdateIndexWithMultipleColumns) {
+    TabletColumn col1, col2;
+    col1.set_unique_id(300);
+    col2.set_unique_id(400);
+    _tablet_schema->append_column(col1);
+    _tablet_schema->append_column(col2);
+
+    TabletIndex old_multi_index = create_test_index(3, IndexType::INVERTED, 
{300, 400}, "multi");
+    _tablet_schema->append_index(std::move(old_multi_index));
+
+    TabletIndex new_multi_index = create_test_index(3, IndexType::NGRAM_BF, 
{300, 400});
+    _tablet_schema->append_index(std::move(new_multi_index));
+
+    ASSERT_NE(_tablet_schema->inverted_index(300, "multi"), nullptr);
+    EXPECT_NE(_tablet_schema->get_ngram_bf_index(400), nullptr);
+}
+
+} // namespace doris
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to