This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6cc2d4a3e5c [Improve](TabletSchema) optimize finding TabletIndex performance in TabletSchema (#48699) 6cc2d4a3e5c is described below commit 6cc2d4a3e5c08520584ddf11e10defede4b82736 Author: lihangyu <lihan...@selectdb.com> AuthorDate: Thu Mar 27 21:32:29 2025 +0800 [Improve](TabletSchema) optimize finding TabletIndex performance in TabletSchema (#48699) Add an `unordered_map` to accelerate TabletIndex search speed  --- be/src/olap/tablet_schema.cpp | 91 +++++++++------- be/src/olap/tablet_schema.h | 17 +++ be/test/olap/tablet_schema_index_test.cpp | 174 ++++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+), 38 deletions(-) diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index a2c4f29ce0d..2ffc4a2d1a3 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -905,6 +905,10 @@ void TabletColumn::append_sparse_column(TabletColumn column) { } void TabletSchema::append_index(TabletIndex&& index) { + for (int32_t id : index.col_unique_ids()) { + _col_id_suffix_to_index.emplace( + std::make_tuple(index.index_type(), id, index.get_index_suffix()), _indexes.size()); + } _indexes.push_back(std::make_shared<TabletIndex>(index)); } @@ -912,15 +916,14 @@ void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_ TabletIndex&& index) { int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); const std::string& suffix_path = escape_for_path_name(col.suffix_path()); - for (auto& _indexe : _indexes) { - for (int32_t id : _indexe->col_unique_ids()) { - if (_indexe->index_type() == index_type && id == col_unique_id && - _indexe->get_index_suffix() == suffix_path) { - _indexe = std::make_shared<TabletIndex>(std::move(index)); - break; - } - } + IndexKey key(index_type, col_unique_id, suffix_path); + auto iter = _col_id_suffix_to_index.find(key); + if (iter != _col_id_suffix_to_index.end()) { + _indexes[iter->second] = std::make_shared<TabletIndex>(std::move(index)); + return; } + LOG(WARNING) << " failed to update_index: " << index_type << " " << col_unique_id << " " + << suffix_path; } void TabletSchema::replace_column(size_t pos, TabletColumn new_col) { @@ -938,17 +941,25 @@ void TabletSchema::clear_index_cache_handlers() { void TabletSchema::clear_index() { clear_index_cache_handlers(); _indexes.clear(); + _col_id_suffix_to_index.clear(); } void TabletSchema::remove_index(int64_t index_id) { std::vector<TabletIndexPtr> indexes; + std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index; for (auto index : _indexes) { if (index->index_id() == index_id) { continue; } + for (int32_t col_uid : index->col_unique_ids()) { + col_id_suffix_to_index.emplace( + std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()), + indexes.size()); + } indexes.emplace_back(std::move(index)); } _indexes = std::move(indexes); + _col_id_suffix_to_index = std::move(col_id_suffix_to_index); } void TabletSchema::clear_columns() { @@ -979,6 +990,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _num_null_columns = 0; _cols.clear(); _indexes.clear(); + _col_id_suffix_to_index.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); _cluster_key_uids.clear(); @@ -1032,6 +1044,11 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac index = std::make_shared<TabletIndex>(); index->init_from_pb(index_pb); } + for (int32_t col_uid : index->col_unique_ids()) { + _col_id_suffix_to_index.emplace( + std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()), + _indexes.size()); + } _indexes.emplace_back(std::move(index)); } _num_short_key_columns = schema.num_short_key_columns(); @@ -1150,6 +1167,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version bool has_bf_columns = false; _cols.clear(); _indexes.clear(); + _col_id_suffix_to_index.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); _delete_sign_idx = -1; @@ -1189,7 +1207,12 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _num_columns++; } - for (auto& i : index->indexes) { + for (const auto& i : index->indexes) { + for (int32_t col_uid : i->col_unique_ids()) { + _col_id_suffix_to_index.emplace( + std::make_tuple(i->index_type(), col_uid, i->get_index_suffix()), + _indexes.size()); + } _indexes.emplace_back(std::make_shared<TabletIndex>(*i)); } @@ -1371,6 +1394,15 @@ void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTabl indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index))); } _indexes = std::move(indexes); + std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index; + for (size_t i = 0; i < _indexes.size(); i++) { + for (int32_t col_uid : _indexes[i]->col_unique_ids()) { + col_id_suffix_to_index.emplace(std::make_tuple(_indexes[i]->index_type(), col_uid, + _indexes[i]->get_index_suffix()), + i); + } + } + _col_id_suffix_to_index = std::move(col_id_suffix_to_index); } bool TabletSchema::exist_column(const std::string& field_name) const { @@ -1426,14 +1458,10 @@ bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const { const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id, const std::string& suffix_path) const { const std::string escaped_suffix = escape_for_path_name(suffix_path); - for (const auto& _index : _indexes) { - if (_index->index_type() == IndexType::INVERTED) { - for (int32_t id : _index->col_unique_ids()) { - if (id == col_unique_id && _index->get_index_suffix() == escaped_suffix) { - return _index.get(); - } - } - } + auto it = _col_id_suffix_to_index.find( + std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix)); + if (it != _col_id_suffix_to_index.end()) { + return _indexes[it->second].get(); } return nullptr; } @@ -1450,30 +1478,17 @@ const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const { } bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { - // TODO use more efficient impl - for (const auto& _index : _indexes) { - if (_index->index_type() == IndexType::NGRAM_BF) { - for (int32_t id : _index->col_unique_ids()) { - if (id == col_unique_id) { - return true; - } - } - } - } - - return false; + IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, ""); + auto it = _col_id_suffix_to_index.find(index_key); + return it != _col_id_suffix_to_index.end(); } const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const { - // TODO use more efficient impl - for (const auto& _index : _indexes) { - if (_index->index_type() == IndexType::NGRAM_BF) { - for (int32_t id : _index->col_unique_ids()) { - if (id == col_unique_id) { - return _index.get(); - } - } - } + // Get the ngram bf index for the given column unique id + IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, ""); + auto it = _col_id_suffix_to_index.find(index_key); + if (it != _col_id_suffix_to_index.end()) { + return _indexes[it->second].get(); } return nullptr; } diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index cdc91981d24..8d0f3f111de 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -560,6 +560,23 @@ private: std::unordered_map<int32_t, int32_t> _field_id_to_index; std::unordered_map<vectorized::PathInDataRef, int32_t, vectorized::PathInDataRef::Hash> _field_path_to_index; + + // index_type/col_unique_id/suffix -> idx in _indexes + using IndexKey = std::tuple<IndexType, int32_t, std::string>; + struct IndexKeyHash { + size_t operator()(const IndexKey& t) const { + std::size_t seed = 0; + seed = doris::HashUtil::hash((const char*)&std::get<0>(t), sizeof(std::get<0>(t)), + seed); + seed = doris::HashUtil::hash((const char*)&std::get<1>(t), sizeof(std::get<1>(t)), + seed); + seed = doris::HashUtil::hash((const char*)std::get<2>(t).c_str(), std::get<2>(t).size(), + seed); + return seed; + } + }; + std::unordered_map<IndexKey, int32_t, IndexKeyHash> _col_id_suffix_to_index; + size_t _num_columns = 0; size_t _num_variant_columns = 0; size_t _num_key_columns = 0; diff --git a/be/test/olap/tablet_schema_index_test.cpp b/be/test/olap/tablet_schema_index_test.cpp new file mode 100644 index 00000000000..b35bbdb2a47 --- /dev/null +++ b/be/test/olap/tablet_schema_index_test.cpp @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> + +#include "olap/tablet_schema.h" + +namespace doris { + +class TabletSchemaIndexTest : public testing::Test { +protected: + void SetUp() override { + // Setup common test data + _tablet_schema = std::make_shared<TabletSchema>(); + } + + TabletIndex create_test_index(int64_t index_id, IndexType type, + const std::vector<int32_t>& col_uids, + const std::string& suffix = "") { + TabletIndex index; + index._index_id = index_id; + index._index_type = type; + index._col_unique_ids = col_uids; + index.set_escaped_escaped_index_suffix_path(suffix); + return index; + } + + std::shared_ptr<TabletSchema> _tablet_schema; +}; + +TEST_F(TabletSchemaIndexTest, TestAddInvertedIndex) { + // Add inverted index with suffix + TabletIndex index = create_test_index(1, IndexType::INVERTED, {100}, "suffix1"); + _tablet_schema->append_index(std::move(index)); + + // Verify index mapping + auto* found_index = _tablet_schema->inverted_index(100, "suffix1"); + ASSERT_NE(found_index, nullptr); + EXPECT_EQ(found_index->index_id(), 1); + EXPECT_EQ(found_index->get_index_suffix(), "suffix1"); +} + +TEST_F(TabletSchemaIndexTest, TestRemoveIndex) { + // Add multiple indexes + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "suffix1")); + _tablet_schema->append_index(create_test_index(2, IndexType::INVERTED, {200}, "suffix2")); + + // Remove index 1 + _tablet_schema->remove_index(1); + + // Verify index 1 removed + EXPECT_EQ(_tablet_schema->inverted_index(100, "suffix1"), nullptr); + + // Verify index 2 still exists + auto* found_index = _tablet_schema->inverted_index(200, "suffix2"); + ASSERT_NE(found_index, nullptr); + EXPECT_EQ(found_index->index_id(), 2); +} + +TEST_F(TabletSchemaIndexTest, TestUpdateIndex) { + // Add initial index + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "old_suffix")); + ASSERT_NE(_tablet_schema->inverted_index(100, "old_suffix"), nullptr); + + // Update index with new suffix + _tablet_schema->remove_index(1); + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "new_suffix")); + + // Verify update + EXPECT_EQ(_tablet_schema->inverted_index(100, "old_suffix"), nullptr); + auto* found_index = _tablet_schema->inverted_index(100, "new_suffix"); + ASSERT_NE(found_index, nullptr); + EXPECT_EQ(found_index->get_index_suffix(), "new%5Fsuffix"); +} + +TEST_F(TabletSchemaIndexTest, TestMultipleColumnsIndex) { + // Add index with multiple columns + TabletIndex index = create_test_index(1, IndexType::INVERTED, {100, 200}, "multi_col"); + _tablet_schema->append_index(std::move(index)); + + // Verify both columns mapped + auto* index1 = _tablet_schema->inverted_index(100, "multi_col"); + auto* index2 = _tablet_schema->inverted_index(200, "multi_col"); + ASSERT_NE(index1, nullptr); + ASSERT_EQ(index1, index2); // Should point to same index +} + +TEST_F(TabletSchemaIndexTest, TestDuplicateIndexKey) { + // Add two indexes with same (type,col,suffix) + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "suffix")); + _tablet_schema->append_index(create_test_index(2, IndexType::INVERTED, {100}, "suffix")); + + // The last added should override + auto* found_index = _tablet_schema->inverted_index(100, "suffix"); + ASSERT_NE(found_index, nullptr); + EXPECT_EQ(found_index->index_id(), 1); +} + +TEST_F(TabletSchemaIndexTest, TestClearIndexes) { + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100})); + _tablet_schema->clear_index(); + + EXPECT_EQ(_tablet_schema->inverted_index(100, ""), nullptr); + EXPECT_TRUE(_tablet_schema->inverted_indexes().empty()); +} + +TEST_F(TabletSchemaIndexTest, TestUpdateIndexMethod) { + TabletColumn col; + col.set_parent_unique_id(100); + col.set_path_info(vectorized::PathInData("v2")); + _tablet_schema->append_column(col); + + TabletIndex old_index = create_test_index(1, IndexType::INVERTED, {100}, "v2"); + _tablet_schema->append_index(std::move(old_index)); + + TabletIndex new_index = create_test_index(1, IndexType::INVERTED, {100}, "v2"); + new_index._properties["new_prop"] = "value"; + + _tablet_schema->update_index(col, IndexType::INVERTED, std::move(new_index)); + + const TabletIndex* updated_index = _tablet_schema->inverted_index(100, "v2"); + ASSERT_NE(updated_index, nullptr); + EXPECT_EQ(updated_index->index_id(), 1); + EXPECT_EQ(updated_index->properties().at("new_prop"), "value"); + + auto key = std::make_tuple(IndexType::INVERTED, 100, "v2"); + EXPECT_NE(_tablet_schema->_col_id_suffix_to_index.find(key), + _tablet_schema->_col_id_suffix_to_index.end()); +} + +TEST_F(TabletSchemaIndexTest, TestUpdateIndexAddNewWhenNotExist) { + // Not exist, return nullptr + TabletColumn col; + col.set_unique_id(200); + + TabletIndex new_index = create_test_index(2, IndexType::INVERTED, {200}, "v3"); + _tablet_schema->update_index(col, IndexType::INVERTED, std::move(new_index)); + + const TabletIndex* index = _tablet_schema->inverted_index(200, "v3"); + ASSERT_EQ(index, nullptr); +} + +TEST_F(TabletSchemaIndexTest, TestUpdateIndexWithMultipleColumns) { + TabletColumn col1, col2; + col1.set_unique_id(300); + col2.set_unique_id(400); + _tablet_schema->append_column(col1); + _tablet_schema->append_column(col2); + + TabletIndex old_multi_index = create_test_index(3, IndexType::INVERTED, {300, 400}, "multi"); + _tablet_schema->append_index(std::move(old_multi_index)); + + TabletIndex new_multi_index = create_test_index(3, IndexType::NGRAM_BF, {300, 400}); + _tablet_schema->append_index(std::move(new_multi_index)); + + ASSERT_NE(_tablet_schema->inverted_index(300, "multi"), nullptr); + EXPECT_NE(_tablet_schema->get_ngram_bf_index(400), nullptr); +} + +} // namespace doris \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org