This is an automated email from the ASF dual-hosted git repository. zhaoc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new cc31bf9 [rowset id] A little improvement of rowset id generator (#3203) cc31bf9 is described below commit cc31bf9cf96ec0bef2ce55553b247df7bbe598e3 Author: Yingchun Lai <405403...@qq.com> AuthorDate: Thu Mar 26 20:24:26 2020 +0800 [rowset id] A little improvement of rowset id generator (#3203) The main optimization points: 1. Use std::unordered_set instead of std::set, and use RowsetId.hi as RowsetId's hash value. 2. Minimize the scope of SpinLock in UniqueRowsetIdGenerator. Profile comparation: * Run UniqueRowsetIdGeneratorTest.GenerateIdBenchmark 10 times old version | new version 6s962ms | 3s647ms 6s139ms | 3s393ms 6s234ms | 3s686ms 6s060ms | 3s447ms 5s966ms | 4s127ms 5s786ms | 3s994ms 5s778ms | 4s072ms 6s193ms | 4s082ms 6s159ms | 3s560ms 5s591ms | 3s654ms --- be/src/olap/olap_common.h | 11 ++- be/src/olap/rowset/rowset_id_generator.h | 2 +- be/src/olap/rowset/unique_rowset_id_generator.cpp | 16 +-- be/src/olap/rowset/unique_rowset_id_generator.h | 14 +-- .../rowset/unique_rowset_id_generator_test.cpp | 109 ++++++++++++++------- run-ut.sh | 1 + 6 files changed, 104 insertions(+), 49 deletions(-) diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index d051220..8dd7723 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -273,7 +273,7 @@ typedef std::set<uint32_t> UniqueIdSet; typedef std::map<ColumnId, ColumnId> UniqueIdToColumnIdMap; // 8 bit rowset id version -// 56 bit, inc number from 0 +// 56 bit, inc number from 1 // 128 bit backend uid, it is a uuid bit, id version struct RowsetId { int8_t version = 0; @@ -305,7 +305,7 @@ struct RowsetId { void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) { version = id_version; - if (high >= MAX_ROWSET_ID) { + if (UNLIKELY(high >= MAX_ROWSET_ID)) { LOG(FATAL) << "inc rowsetid is too large:" << high; } hi = (id_version << 56) + (high & LOW_56_BITS); @@ -350,6 +350,13 @@ struct RowsetId { } }; +struct RowsetIdHash { + size_t operator()(const RowsetId& rowset_id) const { + // hi is an increasing number on a BE instance, we can use it as the hash value simply. + return rowset_id.hi; + } +}; + } // namespace doris #endif // DORIS_BE_SRC_OLAP_OLAP_COMMON_H diff --git a/be/src/olap/rowset/rowset_id_generator.h b/be/src/olap/rowset/rowset_id_generator.h index ce845cb..ec830e2 100644 --- a/be/src/olap/rowset/rowset_id_generator.h +++ b/be/src/olap/rowset/rowset_id_generator.h @@ -39,7 +39,7 @@ public: // for example, during gc logic, gc thread finds a file // and it could not find it under rowset list. but it maybe in use // during load procedure. Gc thread will check it using this method. - virtual bool id_in_use(const RowsetId& rowset_id) = 0; + virtual bool id_in_use(const RowsetId& rowset_id) const = 0; // remove the rowsetid from useful rowsetid list. virtual void release_id(const RowsetId& rowset_id) = 0; diff --git a/be/src/olap/rowset/unique_rowset_id_generator.cpp b/be/src/olap/rowset/unique_rowset_id_generator.cpp index 875abea..d03b43c 100644 --- a/be/src/olap/rowset/unique_rowset_id_generator.cpp +++ b/be/src/olap/rowset/unique_rowset_id_generator.cpp @@ -22,26 +22,28 @@ namespace doris { UniqueRowsetIdGenerator::UniqueRowsetIdGenerator(const UniqueId& backend_uid) : - _backend_uid(backend_uid), _inc_id(1) { + _backend_uid(backend_uid), _inc_id(0) { } // generate a unique rowset id and save it in a set to check whether it is valid in the future RowsetId UniqueRowsetIdGenerator::next_id() { - std::lock_guard<SpinLock> l(_lock); RowsetId rowset_id; rowset_id.init(_version, ++_inc_id, _backend_uid.hi, _backend_uid.lo); - _valid_rowset_ids.insert(rowset_id); + { + std::lock_guard<SpinLock> l(_lock); + _valid_rowset_ids.insert(rowset_id); + } return rowset_id; } -bool UniqueRowsetIdGenerator::id_in_use(const RowsetId& rowset_id) { - std::lock_guard<SpinLock> l(_lock); - // if rowset_id == 1, then it is an old version rowsetid, not gc it +bool UniqueRowsetIdGenerator::id_in_use(const RowsetId& rowset_id) const { + // if rowset_id == 1, then it is an old version rowsetid, not gc it // because old version rowset id is not generated by this code, so that not delete them if (rowset_id.version < _version) { return true; } - return _valid_rowset_ids.find(rowset_id) != _valid_rowset_ids.end(); + std::lock_guard<SpinLock> l(_lock); + return _valid_rowset_ids.count(rowset_id) == 1; } void UniqueRowsetIdGenerator::release_id(const RowsetId& rowset_id) { diff --git a/be/src/olap/rowset/unique_rowset_id_generator.h b/be/src/olap/rowset/unique_rowset_id_generator.h index a0e9861..2715438 100644 --- a/be/src/olap/rowset/unique_rowset_id_generator.h +++ b/be/src/olap/rowset/unique_rowset_id_generator.h @@ -30,16 +30,18 @@ public: RowsetId next_id() override; - bool id_in_use(const RowsetId& rowset_id) override; + bool id_in_use(const RowsetId& rowset_id) const override; void release_id(const RowsetId& rowset_id) override; private: - SpinLock _lock; - UniqueId _backend_uid; + mutable SpinLock _lock; + const UniqueId _backend_uid; const int64_t _version = 2; // modify it when create new version id generator - int64_t _inc_id = 0; - std::set<RowsetId> _valid_rowset_ids; -}; // FeBasedRowsetIdGenerator + std::atomic<int64_t> _inc_id; + std::unordered_set<RowsetId, RowsetIdHash> _valid_rowset_ids; + + DISALLOW_COPY_AND_ASSIGN(UniqueRowsetIdGenerator); +}; // UniqueRowsetIdGenerator } // namespace doris diff --git a/be/test/olap/rowset/unique_rowset_id_generator_test.cpp b/be/test/olap/rowset/unique_rowset_id_generator_test.cpp index 5fc021e..b8da255 100644 --- a/be/test/olap/rowset/unique_rowset_id_generator_test.cpp +++ b/be/test/olap/rowset/unique_rowset_id_generator_test.cpp @@ -20,6 +20,10 @@ #include <gtest/gtest.h> #include <iostream> +#include "util/runtime_profile.h" +#include "util/threadpool.h" +#include "util/pretty_printer.h" + namespace doris { class UniqueRowsetIdGeneratorTest : public testing::Test { public: @@ -29,33 +33,38 @@ public: }; TEST_F(UniqueRowsetIdGeneratorTest, RowsetIdFormatTest) { - int64_t max_id = 1; - max_id = max_id << 56; { + int64_t hi = 1; // version + hi <<= 56; RowsetId rowset_id; rowset_id.init(123); - ASSERT_TRUE(rowset_id.version == 1); - ASSERT_TRUE(rowset_id.hi == (123 + max_id)); - ASSERT_TRUE(rowset_id.mi == 0); - ASSERT_TRUE(rowset_id.lo == 0); - ASSERT_STREQ("123", rowset_id.to_string().c_str()); + ASSERT_EQ(rowset_id.version, 1); + ASSERT_EQ(rowset_id.hi, 123 + hi); + ASSERT_EQ(rowset_id.mi, 0); + ASSERT_EQ(rowset_id.lo, 0); + ASSERT_EQ(std::string("123"), rowset_id.to_string()); } { + int64_t hi = 1; // version + hi <<= 56; RowsetId rowset_id; rowset_id.init("123"); - ASSERT_TRUE(rowset_id.version == 1); - ASSERT_TRUE(rowset_id.hi == (123 + max_id)); - ASSERT_TRUE(rowset_id.mi == 0); - ASSERT_TRUE(rowset_id.lo == 0); - ASSERT_STREQ("123", rowset_id.to_string().c_str()); + ASSERT_EQ(rowset_id.version, 1); + ASSERT_EQ(rowset_id.hi, 123 + hi); + ASSERT_EQ(rowset_id.mi, 0); + ASSERT_EQ(rowset_id.lo, 0); + ASSERT_EQ(std::string("123"), rowset_id.to_string()); } { + int64_t hi = 2; // version + hi <<= 56; + const std::string rowset_id_v2("0200000000000003c04f58d989cab2f2efd45faa20449189"); RowsetId rowset_id; - rowset_id.init("0200000000000003c04f58d989cab2f2efd45faa20449189"); - ASSERT_TRUE(rowset_id.version == 2); - ASSERT_TRUE(rowset_id.hi == (3 + max_id)); - ASSERT_STREQ("0200000000000003c04f58d989cab2f2efd45faa20449189", rowset_id.to_string().c_str()); + rowset_id.init(rowset_id_v2); + ASSERT_EQ(rowset_id.version, 2); + ASSERT_EQ(rowset_id.hi, 3 + hi); + ASSERT_EQ(std::string(rowset_id_v2), rowset_id.to_string()); } } @@ -63,39 +72,73 @@ TEST_F(UniqueRowsetIdGeneratorTest, RowsetIdFormatTest) { TEST_F(UniqueRowsetIdGeneratorTest, GenerateIdTest) { UniqueId backend_uid = UniqueId::gen_uid(); UniqueId backend_uid2 = UniqueId::gen_uid(); - ASSERT_TRUE(backend_uid != backend_uid2); + ASSERT_NE(backend_uid, backend_uid2); UniqueRowsetIdGenerator id_generator(backend_uid); UniqueRowsetIdGenerator id_generator2(backend_uid2); { - RowsetId rowset_id1 = id_generator.next_id(); + RowsetId rowset_id1 = id_generator.next_id(); // hi == 1 RowsetId rowset_id2 = id_generator2.next_id(); - ASSERT_TRUE(rowset_id1.hi != rowset_id2.hi); + ASSERT_EQ(rowset_id1.hi, rowset_id2.hi); } { - int64_t max_id = 2; - max_id = max_id << 56; - RowsetId rowset_id = id_generator.next_id(); - ASSERT_TRUE(rowset_id.hi == (1 + max_id)); - ASSERT_TRUE(rowset_id.version == 2); - ASSERT_TRUE(backend_uid.lo == rowset_id.lo); - ASSERT_TRUE(backend_uid.hi == rowset_id.mi); - ASSERT_TRUE(rowset_id.hi != 0); + int64_t hi = 2; // version + hi <<= 56; + RowsetId rowset_id = id_generator.next_id(); // hi == 2 + ASSERT_EQ(rowset_id.hi, hi + 2); + ASSERT_EQ(rowset_id.version, 2); + ASSERT_EQ(backend_uid.lo, rowset_id.lo); + ASSERT_EQ(backend_uid.hi, rowset_id.mi); + ASSERT_NE(rowset_id.hi, 0); bool in_use = id_generator.id_in_use(rowset_id); - ASSERT_TRUE(in_use == true); + ASSERT_TRUE(in_use); id_generator.release_id(rowset_id); in_use = id_generator.id_in_use(rowset_id); - ASSERT_TRUE(in_use == false); + ASSERT_FALSE(in_use); int64_t high = rowset_id.hi + 1; - rowset_id = id_generator.next_id(); - ASSERT_TRUE(rowset_id.hi == high); + rowset_id = id_generator.next_id(); // hi == 3 + ASSERT_EQ(rowset_id.hi, high); in_use = id_generator.id_in_use(rowset_id); - ASSERT_TRUE(in_use == true); + ASSERT_TRUE(in_use); std::string rowset_mid_str = rowset_id.to_string().substr(16,16); std::string backend_mid_str = backend_uid.to_string().substr(0, 16); - ASSERT_STREQ(rowset_mid_str.c_str(), backend_mid_str.c_str()); + ASSERT_EQ(rowset_mid_str, backend_mid_str); + } +} + +TEST_F(UniqueRowsetIdGeneratorTest, GenerateIdBenchmark) { + const int kNumThreads = 8; + const int kIdPerThread = 1000000; + + UniqueId backend_uid = UniqueId::gen_uid(); + UniqueRowsetIdGenerator id_generator(backend_uid); + std::unique_ptr<ThreadPool> pool; + Status s = ThreadPoolBuilder("GenerateIdBenchmark") + .set_min_threads(kNumThreads) + .set_max_threads(kNumThreads) + .build(&pool); + ASSERT_TRUE(s.ok()) << s.to_string(); + + int64_t cost_ns = 0; + { + SCOPED_RAW_TIMER(&cost_ns); + for (int i = 0; i < kNumThreads; i++) { + ASSERT_TRUE(pool->submit_func([&id_generator]() { + for (int i = 0; i < kIdPerThread; ++i) { + id_generator.next_id(); + } + }).ok()); + } + pool->wait(); } + + int64_t hi = 2; // version + hi <<= 56; + RowsetId last_id = id_generator.next_id(); + ASSERT_EQ(last_id.hi, hi + kNumThreads * kIdPerThread + 1); + std::cout << "Generate " << kNumThreads * kIdPerThread << " rowset ids cost " + << PrettyPrinter::print(cost_ns, TUnit::TIME_NS) << std::endl; } } diff --git a/run-ut.sh b/run-ut.sh index d10cccb..0b4b531 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -277,6 +277,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_meta_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/alpha_rowset_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/beta_rowset_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_converter_test +${DORIS_TEST_BINARY_DIR}/olap/rowset/unique_rowset_id_generator_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitshuffle_page_test --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org