This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new cc31bf9  [rowset id] A little improvement of rowset id generator 
(#3203)
cc31bf9 is described below

commit cc31bf9cf96ec0bef2ce55553b247df7bbe598e3
Author: Yingchun Lai <405403...@qq.com>
AuthorDate: Thu Mar 26 20:24:26 2020 +0800

    [rowset id] A little improvement of rowset id generator (#3203)
    
    The main optimization points:
    1. Use std::unordered_set instead of std::set, and use RowsetId.hi as 
RowsetId's hash value.
    2. Minimize the scope of SpinLock in UniqueRowsetIdGenerator.
    
    Profile comparation:
    * Run UniqueRowsetIdGeneratorTest.GenerateIdBenchmark 10 times
    old version |  new version
    6s962ms     |  3s647ms
    6s139ms     |  3s393ms
    6s234ms     |  3s686ms
    6s060ms     |  3s447ms
    5s966ms     |  4s127ms
    5s786ms     |  3s994ms
    5s778ms     |  4s072ms
    6s193ms     |  4s082ms
    6s159ms     |  3s560ms
    5s591ms     |  3s654ms
---
 be/src/olap/olap_common.h                          |  11 ++-
 be/src/olap/rowset/rowset_id_generator.h           |   2 +-
 be/src/olap/rowset/unique_rowset_id_generator.cpp  |  16 +--
 be/src/olap/rowset/unique_rowset_id_generator.h    |  14 +--
 .../rowset/unique_rowset_id_generator_test.cpp     | 109 ++++++++++++++-------
 run-ut.sh                                          |   1 +
 6 files changed, 104 insertions(+), 49 deletions(-)

diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index d051220..8dd7723 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -273,7 +273,7 @@ typedef std::set<uint32_t> UniqueIdSet;
 typedef std::map<ColumnId, ColumnId> UniqueIdToColumnIdMap;
 
 // 8 bit rowset id version
-// 56 bit, inc number from 0
+// 56 bit, inc number from 1
 // 128 bit backend uid, it is a uuid bit, id version
 struct RowsetId {
     int8_t version = 0;
@@ -305,7 +305,7 @@ struct RowsetId {
 
     void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) {
         version = id_version;
-        if (high >= MAX_ROWSET_ID) {
+        if (UNLIKELY(high >= MAX_ROWSET_ID)) {
             LOG(FATAL) << "inc rowsetid is too large:" << high;
         }
         hi = (id_version << 56) + (high & LOW_56_BITS);
@@ -350,6 +350,13 @@ struct RowsetId {
     }
 };
 
+struct RowsetIdHash {
+    size_t operator()(const RowsetId& rowset_id) const {
+        // hi is an increasing number on a BE instance, we can use it as the 
hash value simply.
+        return rowset_id.hi;
+    }
+};
+
 }  // namespace doris
 
 #endif // DORIS_BE_SRC_OLAP_OLAP_COMMON_H
diff --git a/be/src/olap/rowset/rowset_id_generator.h 
b/be/src/olap/rowset/rowset_id_generator.h
index ce845cb..ec830e2 100644
--- a/be/src/olap/rowset/rowset_id_generator.h
+++ b/be/src/olap/rowset/rowset_id_generator.h
@@ -39,7 +39,7 @@ public:
     // for example, during gc logic, gc thread finds a file
     // and it could not find it under rowset list. but it maybe in use
     // during load procedure. Gc thread will check it using this method.
-    virtual bool id_in_use(const RowsetId& rowset_id) = 0;
+    virtual bool id_in_use(const RowsetId& rowset_id) const = 0;
 
     // remove the rowsetid from useful rowsetid list.
     virtual void release_id(const RowsetId& rowset_id) = 0;
diff --git a/be/src/olap/rowset/unique_rowset_id_generator.cpp 
b/be/src/olap/rowset/unique_rowset_id_generator.cpp
index 875abea..d03b43c 100644
--- a/be/src/olap/rowset/unique_rowset_id_generator.cpp
+++ b/be/src/olap/rowset/unique_rowset_id_generator.cpp
@@ -22,26 +22,28 @@
 namespace doris {
 
 UniqueRowsetIdGenerator::UniqueRowsetIdGenerator(const UniqueId& backend_uid) :
-    _backend_uid(backend_uid), _inc_id(1) {
+    _backend_uid(backend_uid), _inc_id(0) {
 }
 
 // generate a unique rowset id and save it in a set to check whether it is 
valid in the future
 RowsetId UniqueRowsetIdGenerator::next_id() {
-    std::lock_guard<SpinLock> l(_lock);
     RowsetId rowset_id;
     rowset_id.init(_version, ++_inc_id, _backend_uid.hi, _backend_uid.lo);
-    _valid_rowset_ids.insert(rowset_id);
+    {
+        std::lock_guard<SpinLock> l(_lock);
+        _valid_rowset_ids.insert(rowset_id);
+    }
     return rowset_id;
 }
 
-bool UniqueRowsetIdGenerator::id_in_use(const RowsetId& rowset_id) {
-    std::lock_guard<SpinLock> l(_lock);
-    // if rowset_id == 1, then it is an old version rowsetid, not gc it 
+bool UniqueRowsetIdGenerator::id_in_use(const RowsetId& rowset_id) const {
+    // if rowset_id == 1, then it is an old version rowsetid, not gc it
     // because old version rowset id is not generated by this code, so that 
not delete them
     if (rowset_id.version < _version) {
         return true;
     }
-    return _valid_rowset_ids.find(rowset_id) != _valid_rowset_ids.end();
+    std::lock_guard<SpinLock> l(_lock);
+    return _valid_rowset_ids.count(rowset_id) == 1;
 }
 
 void UniqueRowsetIdGenerator::release_id(const RowsetId& rowset_id) {
diff --git a/be/src/olap/rowset/unique_rowset_id_generator.h 
b/be/src/olap/rowset/unique_rowset_id_generator.h
index a0e9861..2715438 100644
--- a/be/src/olap/rowset/unique_rowset_id_generator.h
+++ b/be/src/olap/rowset/unique_rowset_id_generator.h
@@ -30,16 +30,18 @@ public:
 
     RowsetId next_id() override;
 
-    bool id_in_use(const RowsetId& rowset_id) override;
+    bool id_in_use(const RowsetId& rowset_id) const override;
 
     void release_id(const RowsetId& rowset_id) override;
 
 private:
-    SpinLock _lock;
-    UniqueId _backend_uid;
+    mutable SpinLock _lock;
+    const UniqueId _backend_uid;
     const int64_t _version = 2; // modify it when create new version id 
generator
-    int64_t _inc_id = 0;
-    std::set<RowsetId> _valid_rowset_ids; 
-}; // FeBasedRowsetIdGenerator
+    std::atomic<int64_t> _inc_id;
+    std::unordered_set<RowsetId, RowsetIdHash> _valid_rowset_ids;
+
+    DISALLOW_COPY_AND_ASSIGN(UniqueRowsetIdGenerator);
+}; // UniqueRowsetIdGenerator
 
 } // namespace doris
diff --git a/be/test/olap/rowset/unique_rowset_id_generator_test.cpp 
b/be/test/olap/rowset/unique_rowset_id_generator_test.cpp
index 5fc021e..b8da255 100644
--- a/be/test/olap/rowset/unique_rowset_id_generator_test.cpp
+++ b/be/test/olap/rowset/unique_rowset_id_generator_test.cpp
@@ -20,6 +20,10 @@
 #include <gtest/gtest.h>
 #include <iostream>
 
+#include "util/runtime_profile.h"
+#include "util/threadpool.h"
+#include "util/pretty_printer.h"
+
 namespace doris {
 class UniqueRowsetIdGeneratorTest : public testing::Test {
 public:
@@ -29,33 +33,38 @@ public:
 };
 
 TEST_F(UniqueRowsetIdGeneratorTest, RowsetIdFormatTest) {
-    int64_t max_id = 1;
-    max_id = max_id << 56;
     {
+        int64_t hi = 1;  // version
+        hi <<= 56;
         RowsetId rowset_id;
         rowset_id.init(123);
-        ASSERT_TRUE(rowset_id.version == 1);
-        ASSERT_TRUE(rowset_id.hi == (123 + max_id));
-        ASSERT_TRUE(rowset_id.mi == 0);
-        ASSERT_TRUE(rowset_id.lo == 0);
-        ASSERT_STREQ("123", rowset_id.to_string().c_str());
+        ASSERT_EQ(rowset_id.version, 1);
+        ASSERT_EQ(rowset_id.hi, 123 + hi);
+        ASSERT_EQ(rowset_id.mi, 0);
+        ASSERT_EQ(rowset_id.lo, 0);
+        ASSERT_EQ(std::string("123"), rowset_id.to_string());
     }
     {
+        int64_t hi = 1;  // version
+        hi <<= 56;
         RowsetId rowset_id;
         rowset_id.init("123");
-        ASSERT_TRUE(rowset_id.version == 1);
-        ASSERT_TRUE(rowset_id.hi == (123 + max_id));
-        ASSERT_TRUE(rowset_id.mi == 0);
-        ASSERT_TRUE(rowset_id.lo == 0);
-        ASSERT_STREQ("123", rowset_id.to_string().c_str());
+        ASSERT_EQ(rowset_id.version, 1);
+        ASSERT_EQ(rowset_id.hi, 123 + hi);
+        ASSERT_EQ(rowset_id.mi, 0);
+        ASSERT_EQ(rowset_id.lo, 0);
+        ASSERT_EQ(std::string("123"), rowset_id.to_string());
     }
     
     {
+        int64_t hi = 2;  // version
+        hi <<= 56;
+        const std::string 
rowset_id_v2("0200000000000003c04f58d989cab2f2efd45faa20449189");
         RowsetId rowset_id;
-        rowset_id.init("0200000000000003c04f58d989cab2f2efd45faa20449189");
-        ASSERT_TRUE(rowset_id.version == 2);
-        ASSERT_TRUE(rowset_id.hi == (3 + max_id));
-        ASSERT_STREQ("0200000000000003c04f58d989cab2f2efd45faa20449189", 
rowset_id.to_string().c_str());
+        rowset_id.init(rowset_id_v2);
+        ASSERT_EQ(rowset_id.version, 2);
+        ASSERT_EQ(rowset_id.hi, 3 + hi);
+        ASSERT_EQ(std::string(rowset_id_v2), rowset_id.to_string());
     }
 }
 
@@ -63,39 +72,73 @@ TEST_F(UniqueRowsetIdGeneratorTest, RowsetIdFormatTest) {
 TEST_F(UniqueRowsetIdGeneratorTest, GenerateIdTest) {
     UniqueId backend_uid = UniqueId::gen_uid();
     UniqueId backend_uid2 = UniqueId::gen_uid();
-    ASSERT_TRUE(backend_uid != backend_uid2);
+    ASSERT_NE(backend_uid, backend_uid2);
     UniqueRowsetIdGenerator id_generator(backend_uid);
     UniqueRowsetIdGenerator id_generator2(backend_uid2);
     {
-        RowsetId rowset_id1 = id_generator.next_id();
+        RowsetId rowset_id1 = id_generator.next_id();  // hi == 1
         RowsetId rowset_id2 = id_generator2.next_id();
-        ASSERT_TRUE(rowset_id1.hi != rowset_id2.hi);
+        ASSERT_EQ(rowset_id1.hi, rowset_id2.hi);
     }
     {
-        int64_t max_id = 2;
-        max_id = max_id << 56;
-        RowsetId rowset_id = id_generator.next_id();
-        ASSERT_TRUE(rowset_id.hi == (1 + max_id));
-        ASSERT_TRUE(rowset_id.version == 2);
-        ASSERT_TRUE(backend_uid.lo == rowset_id.lo);
-        ASSERT_TRUE(backend_uid.hi == rowset_id.mi);
-        ASSERT_TRUE(rowset_id.hi != 0);
+        int64_t hi = 2;  // version
+        hi <<= 56;
+        RowsetId rowset_id = id_generator.next_id();  // hi == 2
+        ASSERT_EQ(rowset_id.hi, hi + 2);
+        ASSERT_EQ(rowset_id.version, 2);
+        ASSERT_EQ(backend_uid.lo, rowset_id.lo);
+        ASSERT_EQ(backend_uid.hi, rowset_id.mi);
+        ASSERT_NE(rowset_id.hi, 0);
         bool in_use = id_generator.id_in_use(rowset_id);
-        ASSERT_TRUE(in_use == true);
+        ASSERT_TRUE(in_use);
         id_generator.release_id(rowset_id);
         in_use = id_generator.id_in_use(rowset_id);
-        ASSERT_TRUE(in_use == false);
+        ASSERT_FALSE(in_use);
 
         int64_t high = rowset_id.hi + 1;
-        rowset_id = id_generator.next_id();
-        ASSERT_TRUE(rowset_id.hi == high);
+        rowset_id = id_generator.next_id();  // hi == 3
+        ASSERT_EQ(rowset_id.hi, high);
         in_use = id_generator.id_in_use(rowset_id);
-        ASSERT_TRUE(in_use == true);
+        ASSERT_TRUE(in_use);
 
         std::string rowset_mid_str = rowset_id.to_string().substr(16,16);
         std::string backend_mid_str = backend_uid.to_string().substr(0, 16);
-        ASSERT_STREQ(rowset_mid_str.c_str(), backend_mid_str.c_str());
+        ASSERT_EQ(rowset_mid_str, backend_mid_str);
+    }
+}
+
+TEST_F(UniqueRowsetIdGeneratorTest, GenerateIdBenchmark) {
+    const int kNumThreads = 8;
+    const int kIdPerThread = 1000000;
+
+    UniqueId backend_uid = UniqueId::gen_uid();
+    UniqueRowsetIdGenerator id_generator(backend_uid);
+    std::unique_ptr<ThreadPool> pool;
+    Status s = ThreadPoolBuilder("GenerateIdBenchmark")
+            .set_min_threads(kNumThreads)
+            .set_max_threads(kNumThreads)
+            .build(&pool);
+    ASSERT_TRUE(s.ok()) << s.to_string();
+
+    int64_t cost_ns = 0;
+    {
+        SCOPED_RAW_TIMER(&cost_ns);
+        for (int i = 0; i < kNumThreads; i++) {
+            ASSERT_TRUE(pool->submit_func([&id_generator]() {
+                for (int i = 0; i < kIdPerThread; ++i) {
+                    id_generator.next_id();
+                }
+            }).ok());
+        }
+        pool->wait();
     }
+
+    int64_t hi = 2;  // version
+    hi <<= 56;
+    RowsetId last_id = id_generator.next_id();
+    ASSERT_EQ(last_id.hi, hi + kNumThreads * kIdPerThread + 1);
+    std::cout << "Generate " << kNumThreads * kIdPerThread << " rowset ids 
cost "
+              << PrettyPrinter::print(cost_ns, TUnit::TIME_NS) << std::endl;
 }
 
 }
diff --git a/run-ut.sh b/run-ut.sh
index d10cccb..0b4b531 100755
--- a/run-ut.sh
+++ b/run-ut.sh
@@ -277,6 +277,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_meta_test
 ${DORIS_TEST_BINARY_DIR}/olap/rowset/alpha_rowset_test
 ${DORIS_TEST_BINARY_DIR}/olap/rowset/beta_rowset_test
 ${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_converter_test
+${DORIS_TEST_BINARY_DIR}/olap/rowset/unique_rowset_id_generator_test
 ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test
 ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test
 ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitshuffle_page_test


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to