This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8bd06aff7ed [Chore](MoW) remove unused code about rowset tree (#26282)
8bd06aff7ed is described below

commit 8bd06aff7ed07f4c9c0d71163122d7700c031228
Author: Xin Liao <liaoxin...@126.com>
AuthorDate: Thu Nov 2 20:25:27 2023 +0800

    [Chore](MoW) remove unused code about rowset tree (#26282)
---
 be/src/olap/delete_bitmap_calculator.h   |   1 -
 be/src/olap/rowset/rowset_tree.cpp       | 276 -------------------
 be/src/olap/rowset/rowset_tree.h         | 139 ----------
 be/src/olap/tablet.cpp                   |   2 -
 be/test/olap/rowset/rowset_tree_test.cpp | 459 -------------------------------
 5 files changed, 877 deletions(-)

diff --git a/be/src/olap/delete_bitmap_calculator.h 
b/be/src/olap/delete_bitmap_calculator.h
index 902db7cb71c..dd17fe7b686 100644
--- a/be/src/olap/delete_bitmap_calculator.h
+++ b/be/src/olap/delete_bitmap_calculator.h
@@ -33,7 +33,6 @@
 #include "olap/rowset/rowset.h"
 #include "olap/rowset/rowset_meta.h"
 #include "olap/rowset/rowset_reader.h"
-#include "olap/rowset/rowset_tree.h"
 #include "olap/rowset/segment_v2/segment.h"
 #include "olap/tablet_meta.h"
 #include "olap/tablet_schema.h"
diff --git a/be/src/olap/rowset/rowset_tree.cpp 
b/be/src/olap/rowset/rowset_tree.cpp
deleted file mode 100644
index 8ac153435fc..00000000000
--- a/be/src/olap/rowset/rowset_tree.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// This file is copied from
-// https://github.com/apache/kudu/blob/master/src/kudu/tablet/rowset_tree.cc
-// and modified by Doris
-
-#include "olap/rowset/rowset_tree.h"
-
-#include <gen_cpp/olap_file.pb.h>
-#include <glog/logging.h>
-
-#include <algorithm>
-#include <cstddef>
-#include <functional>
-#include <iterator>
-#include <memory>
-#include <ostream>
-#include <string>
-#include <vector>
-
-#include "gutil/stl_util.h"
-#include "gutil/strings/substitute.h"
-#include "olap/rowset/rowset.h"
-#include "olap/rowset/rowset_meta.h"
-#include "util/interval_tree-inl.h"
-#include "util/interval_tree.h"
-#include "util/slice.h"
-
-using std::shared_ptr;
-using std::string;
-using std::unique_ptr;
-using std::vector;
-
-namespace doris {
-
-namespace {
-
-// Lexicographic, first by slice, then by rowset pointer, then by segment id, 
then by start/stop
-bool RSEndpointBySliceCompare(const RowsetTree::RSEndpoint& a, const 
RowsetTree::RSEndpoint& b) {
-    int slice_cmp = a.slice_.compare(b.slice_);
-    if (slice_cmp) return slice_cmp < 0;
-    ptrdiff_t rs_cmp = a.rowset_.get() - b.rowset_.get();
-    if (rs_cmp) return rs_cmp < 0;
-    int seg_cmp = a.segment_id_ < b.segment_id_;
-    if (seg_cmp) return seg_cmp < 0;
-    if (a.endpoint_ != b.endpoint_) return a.endpoint_ == RowsetTree::START;
-    return false;
-}
-
-// Wrapper used when making batch queries into the interval tree.
-struct QueryStruct {
-    // The slice of the operation performing the query.
-    Slice slice;
-    // The original index of this slice in the incoming batch.
-    int idx;
-};
-
-} // anonymous namespace
-
-// Entry for use in the interval tree.
-struct RowsetWithBounds {
-    string min_key;
-    string max_key;
-
-    // NOTE: the ordering of struct fields here is purposeful: we access
-    // min_key and max_key frequently, so putting them first in the struct
-    // ensures they fill a single 64-byte cache line (each is 32 bytes).
-    // The 'rowset' pointer is accessed comparitively rarely.
-    RowsetSharedPtr rowset;
-    int32_t segment_id;
-};
-
-// Traits struct for IntervalTree.
-struct RowsetIntervalTraits {
-    typedef Slice point_type;
-    typedef RowsetWithBounds* interval_type;
-
-    static Slice get_left(const RowsetWithBounds* rs) { return 
Slice(rs->min_key); }
-
-    static Slice get_right(const RowsetWithBounds* rs) { return 
Slice(rs->max_key); }
-
-    static int compare(const Slice& a, const Slice& b) { return a.compare(b); }
-
-    static int compare(const Slice& a, const QueryStruct& b) { return 
a.compare(b.slice); }
-
-    static int compare(const QueryStruct& a, const Slice& b) { return 
-compare(b, a); }
-
-    // When 'a' is std::nullopt:
-    //  (1)'a' is +OO when 'positive_direction' is true;
-    //  (2)'a' is -OO when 'positive_direction' is false.
-    static int compare(const std::optional<Slice>& a, const Slice& b, const 
EndpointIfNone& type) {
-        if (a == std::nullopt) {
-            return ((POSITIVE_INFINITY == type) ? 1 : -1);
-        }
-
-        return compare(*a, b);
-    }
-
-    static int compare(const Slice& a, const std::optional<Slice>& b, const 
EndpointIfNone& type) {
-        return -compare(b, a, type);
-    }
-};
-
-RowsetTree::RowsetTree() : initted_(false) {}
-
-Status RowsetTree::Init(const RowsetVector& rowsets) {
-    if (initted_) {
-        return Status::InternalError("Call Init method on a RowsetTree that's 
already inited!");
-    }
-    std::vector<RowsetWithBounds*> entries;
-    ElementDeleter deleter(&entries);
-    entries.reserve(rowsets.size());
-    std::vector<RSEndpoint> endpoints;
-    endpoints.reserve(rowsets.size() * 2);
-
-    // Iterate over each of the provided Rowsets, fetching their
-    // bounds and adding them to the local vectors.
-    for (const RowsetSharedPtr& rs : rowsets) {
-        std::vector<KeyBoundsPB> segments_key_bounds;
-        Status s = rs->get_segments_key_bounds(&segments_key_bounds);
-        if (!s.ok()) {
-            LOG(WARNING) << "Unable to construct RowsetTree: " << 
rs->rowset_id()
-                         << " unable to determine its bounds: " << 
s.to_string();
-            return s;
-        }
-        DCHECK_EQ(segments_key_bounds.size(), rs->num_segments());
-
-        for (auto i = 0; i < rs->num_segments(); i++) {
-            unique_ptr<RowsetWithBounds> rsit(new RowsetWithBounds());
-            rsit->rowset = rs;
-            rsit->segment_id = i;
-            string min_key = segments_key_bounds[i].min_key();
-            string max_key = segments_key_bounds[i].max_key();
-            DCHECK_LE(min_key.compare(max_key), 0)
-                    << "Rowset min: " << min_key << " must be <= max: " << 
max_key;
-
-            // Load bounds and save entry
-            rsit->min_key = std::move(min_key);
-            rsit->max_key = std::move(max_key);
-
-            // Load into key endpoints.
-            endpoints.emplace_back(rsit->rowset, i, START, rsit->min_key);
-            endpoints.emplace_back(rsit->rowset, i, STOP, rsit->max_key);
-
-            entries.push_back(rsit.release());
-        }
-    }
-
-    // Sort endpoints
-    std::sort(endpoints.begin(), endpoints.end(), RSEndpointBySliceCompare);
-
-    // Install the vectors into the object.
-    entries_.swap(entries);
-    tree_.reset(new IntervalTree<RowsetIntervalTraits>(entries_));
-    key_endpoints_.swap(endpoints);
-    all_rowsets_.assign(rowsets.begin(), rowsets.end());
-
-    // Build the mapping from RS_ID to RS.
-    rs_by_id_.clear();
-    for (auto& rs : all_rowsets_) {
-        if (!rs_by_id_.insert({rs->rowset_id(), rs}).second) {
-            return Status::InternalError(strings::Substitute(
-                    "Add rowset with $0 to rowset tree of tablet $1 failed",
-                    rs->rowset_id().to_string(), 
rs->rowset_meta()->tablet_uid().to_string()));
-        }
-    }
-
-    initted_ = true;
-
-    return Status::OK();
-}
-
-void RowsetTree::FindRowsetsIntersectingInterval(
-        const std::optional<Slice>& lower_bound, const std::optional<Slice>& 
upper_bound,
-        vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const {
-    DCHECK(initted_);
-
-    vector<RowsetWithBounds*> from_tree;
-    from_tree.reserve(all_rowsets_.size());
-    tree_->FindIntersectingInterval(lower_bound, upper_bound, &from_tree);
-    rowsets->reserve(rowsets->size() + from_tree.size());
-    for (RowsetWithBounds* rs : from_tree) {
-        rowsets->emplace_back(rs->rowset, rs->segment_id);
-    }
-}
-
-void RowsetTree::FindRowsetsWithKeyInRange(
-        const Slice& encoded_key, const RowsetIdUnorderedSet* rowset_ids,
-        vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const {
-    DCHECK(initted_);
-
-    // Query the interval tree to efficiently find rowsets with known bounds
-    // whose ranges overlap the probe key.
-    vector<RowsetWithBounds*> from_tree;
-    from_tree.reserve(all_rowsets_.size());
-    tree_->FindContainingPoint(encoded_key, &from_tree);
-    rowsets->reserve(rowsets->size() + from_tree.size());
-    for (RowsetWithBounds* rs : from_tree) {
-        if (!rowset_ids || rowset_ids->find(rs->rowset->rowset_id()) != 
rowset_ids->end()) {
-            rowsets->emplace_back(rs->rowset, rs->segment_id);
-        }
-    }
-}
-
-void RowsetTree::ForEachRowsetContainingKeys(
-        const std::vector<Slice>& encoded_keys,
-        const std::function<void(RowsetSharedPtr, int)>& cb) const {
-    DCHECK(std::is_sorted(encoded_keys.cbegin(), encoded_keys.cend(), 
Slice::Comparator()));
-    // The interval tree batch query callback would naturally just give us back
-    // the matching Slices, but that won't allow us to easily tell the caller
-    // which specific operation _index_ matched the Rowset. So, we make a 
vector
-    // of QueryStructs to pair the Slice with its original index.
-    vector<QueryStruct> queries;
-    queries.resize(encoded_keys.size());
-    for (int i = 0; i < encoded_keys.size(); i++) {
-        queries[i] = {encoded_keys[i], i};
-    }
-
-    tree_->ForEachIntervalContainingPoints(
-            queries, [&](const QueryStruct& qs, RowsetWithBounds* rs) { 
cb(rs->rowset, qs.idx); });
-}
-
-RowsetTree::~RowsetTree() {
-    for (RowsetWithBounds* e : entries_) {
-        delete e;
-    }
-    entries_.clear();
-}
-
-void ModifyRowSetTree(const RowsetTree& old_tree, const RowsetVector& 
rowsets_to_remove,
-                      const RowsetVector& rowsets_to_add, RowsetTree* 
new_tree) {
-    RowsetVector post_swap;
-
-    // O(n^2) diff algorithm to collect the set of rowsets excluding
-    // the rowsets that were included in the compaction
-    int num_removed = 0;
-
-    for (const RowsetSharedPtr& rs : old_tree.all_rowsets()) {
-        // Determine if it should be removed
-        bool should_remove = false;
-        for (const RowsetSharedPtr& to_remove : rowsets_to_remove) {
-            if (to_remove->rowset_id() == rs->rowset_id()) {
-                should_remove = true;
-                num_removed++;
-                break;
-            }
-        }
-        if (!should_remove) {
-            post_swap.push_back(rs);
-        }
-    }
-
-    CHECK_EQ(num_removed, rowsets_to_remove.size());
-
-    // Then push the new rowsets on the end of the new list
-    std::copy(rowsets_to_add.begin(), rowsets_to_add.end(), 
std::back_inserter(post_swap));
-
-    CHECK(new_tree->Init(post_swap).ok());
-}
-
-} // namespace doris
diff --git a/be/src/olap/rowset/rowset_tree.h b/be/src/olap/rowset/rowset_tree.h
deleted file mode 100644
index 5ebabca0f1e..00000000000
--- a/be/src/olap/rowset/rowset_tree.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// This file is copied from
-// https://github.com/apache/kudu/blob/master/src/kudu/tablet/rowset_tree.h
-// and modified by Doris
-
-#pragma once
-
-#include <cstdint>
-#include <functional>
-#include <memory>
-#include <optional>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "common/status.h"
-#include "gutil/map-util.h"
-#include "olap/olap_common.h"
-#include "olap/rowset/rowset.h"
-#include "util/slice.h"
-
-namespace doris {
-
-template <class Traits>
-class IntervalTree;
-struct RowsetIntervalTraits;
-struct RowsetWithBounds;
-
-// Used often enough, may as well typedef it.
-typedef std::vector<RowsetSharedPtr> RowsetVector;
-
-// Class which encapsulates the set of rowsets which are active for a given
-// Tablet. This provides efficient lookup by key for Rowsets which may overlap
-// that key range.
-//
-// Additionally, the rowset tree maintains information about the implicit
-// intervals generated by the row sets (for instance, if a tablet has
-// rowsets [0, 2] and [1, 3] it has three implicit contiguous intervals:
-// [0, 1], [1, 2], and [2, 3].
-class RowsetTree {
-public:
-    // An RSEndpoint is a POD which associates a rowset, an EndpointType
-    // (either the START or STOP of an interval), and the key at which the
-    // endpoint is located.
-    enum EndpointType { START, STOP };
-    struct RSEndpoint {
-        RSEndpoint(RowsetSharedPtr rowset, uint32_t segment_id, EndpointType 
endpoint, Slice slice)
-                : rowset_(rowset), segment_id_(segment_id), 
endpoint_(endpoint), slice_(slice) {}
-
-        RowsetSharedPtr rowset_;
-        uint32_t segment_id_;
-        enum EndpointType endpoint_;
-        Slice slice_;
-    };
-
-    RowsetTree();
-    Status Init(const RowsetVector& rowsets);
-    ~RowsetTree();
-
-    // Return Rowsets whose id in rowset_ids and range may contain the given 
encoded key.
-    //
-    // The returned pointers are guaranteed to be valid at least until this
-    // RowsetTree object is Reset().
-    void FindRowsetsWithKeyInRange(const Slice& encoded_key, const 
RowsetIdUnorderedSet* rowset_ids,
-                                   vector<std::pair<RowsetSharedPtr, 
int32_t>>* rowsets) const;
-
-    // Call 'cb(rowset, index)' for each (rowset, index) pair such that
-    // 'encoded_keys[index]' may be within the bounds of 'rowset'.
-    //
-    // See IntervalTree::ForEachIntervalContainingPoints for additional
-    // information on the particular order in which the callback will be 
called.
-    //
-    // REQUIRES: 'encoded_keys' must be in sorted order.
-    void ForEachRowsetContainingKeys(const std::vector<Slice>& encoded_keys,
-                                     const std::function<void(RowsetSharedPtr, 
int)>& cb) const;
-
-    // When 'lower_bound' is boost::none, it means negative infinity.
-    // When 'upper_bound' is boost::none, it means positive infinity.
-    // So the query interval can be one of below:
-    //  [-OO, +OO)
-    //  [-OO, upper_bound)
-    //  [lower_bound, +OO)
-    //  [lower_bound, upper_bound)
-    void FindRowsetsIntersectingInterval(
-            const std::optional<Slice>& lower_bound, const 
std::optional<Slice>& upper_bound,
-            vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const;
-
-    const RowsetVector& all_rowsets() const { return all_rowsets_; }
-
-    RowsetSharedPtr rs_by_id(RowsetId rs_id) const { return 
FindPtrOrNull(rs_by_id_, rs_id); }
-
-    // Iterates over RowsetTree::RSEndpoint, guaranteed to be ordered and for
-    // any rowset to appear exactly twice, once at its start slice and once at
-    // its stop slice, equivalent to its GetBounds() values.
-    const std::vector<RSEndpoint>& key_endpoints() const { return 
key_endpoints_; }
-
-private:
-    // Interval tree of the rowsets. Used to efficiently find rowsets which 
might contain
-    // a probe row.
-    std::unique_ptr<IntervalTree<RowsetIntervalTraits>> tree_;
-
-    // Ordered map of all the interval endpoints, holding the implicit 
contiguous
-    // intervals
-    std::vector<RSEndpoint> key_endpoints_;
-
-    // Container for all of the entries in tree_. IntervalTree does
-    // not itself manage memory, so this provides a simple way to enumerate
-    // all the entry structs and free them in the destructor.
-    std::vector<RowsetWithBounds*> entries_;
-
-    // All of the rowsets which were put in this RowsetTree.
-    RowsetVector all_rowsets_;
-
-    // The Rowsets in this RowsetTree, keyed by their id.
-    std::unordered_map<RowsetId, RowsetSharedPtr, HashOfRowsetId> rs_by_id_;
-
-    bool initted_;
-};
-
-void ModifyRowSetTree(const RowsetTree& old_tree, const RowsetVector& 
rowsets_to_remove,
-                      const RowsetVector& rowsets_to_add, RowsetTree* 
new_tree);
-
-} // namespace doris
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index b4fbb76ce52..4cc03013091 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -305,7 +305,6 @@ Status Tablet::_init_once_action() {
                     _tablet_meta->compaction_policy());
 #endif
 
-    RowsetVector rowset_vec;
     for (const auto& rs_meta : _tablet_meta->all_rs_metas()) {
         Version version = rs_meta->version();
         RowsetSharedPtr rowset;
@@ -317,7 +316,6 @@ Status Tablet::_init_once_action() {
                          << ", res=" << res;
             return res;
         }
-        rowset_vec.push_back(rowset);
         _rs_version_map[version] = std::move(rowset);
     }
 
diff --git a/be/test/olap/rowset/rowset_tree_test.cpp 
b/be/test/olap/rowset/rowset_tree_test.cpp
deleted file mode 100644
index a6f7965f447..00000000000
--- a/be/test/olap/rowset/rowset_tree_test.cpp
+++ /dev/null
@@ -1,459 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// This file is copied from
-// 
https://github.com/apache/kudu/blob/master/src/kudu/tablet/rowset_tree-test.cc
-// and modified by Doris
-
-#include "olap/rowset/rowset_tree.h"
-
-#include <gen_cpp/olap_file.pb.h>
-#include <glog/logging.h>
-#include <gtest/gtest-message.h>
-#include <gtest/gtest-param-test.h>
-#include <gtest/gtest-test-part.h>
-
-#include <algorithm>
-#include <cstdlib>
-#include <cstring>
-#include <memory>
-#include <string>
-#include <tuple>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "gtest/gtest_pred_impl.h"
-#include "gutil/map-util.h"
-#include "gutil/stringprintf.h"
-#include "gutil/strings/substitute.h"
-#include "olap/rowset/rowset.h"
-#include "olap/rowset/rowset_meta.h"
-#include "olap/rowset/unique_rowset_id_generator.h"
-#include "olap/tablet_schema.h"
-#include "testutil/mock_rowset.h"
-#include "testutil/test_util.h"
-#include "util/slice.h"
-#include "util/stopwatch.hpp"
-
-using std::make_shared;
-using std::shared_ptr;
-using std::string;
-using std::unordered_set;
-using std::vector;
-using strings::Substitute;
-
-namespace doris {
-
-class TestRowsetTree : public testing::Test {
-public:
-    TestRowsetTree() : rowset_id_generator_({0, 0}) {}
-
-    void SetUp() {
-        schema_ = std::make_shared<TabletSchema>();
-        TabletSchemaPB schema_pb;
-        schema_pb.set_keys_type(UNIQUE_KEYS);
-        schema_->init_from_pb(schema_pb);
-    }
-
-    // Generates random rowsets with keys between 0 and 10000
-    RowsetVector GenerateRandomRowsets(int num_sets) {
-        RowsetVector vec;
-        for (int i = 0; i < num_sets; i++) {
-            int min = rand() % 9000;
-            int max = min + 1000;
-            vec.push_back(create_rowset(StringPrintf("%04d", min), 
StringPrintf("%04d", max)));
-        }
-        return vec;
-    }
-
-    RowsetSharedPtr create_rowset(const string& min_key, const string& max_key,
-                                  bool is_mem_rowset = false) {
-        RowsetMetaPB rs_meta_pb;
-        
rs_meta_pb.set_rowset_id_v2(rowset_id_generator_.next_id().to_string());
-        rs_meta_pb.set_num_segments(1);
-        KeyBoundsPB key_bounds;
-        key_bounds.set_min_key(min_key);
-        key_bounds.set_max_key(max_key);
-        KeyBoundsPB* new_key_bounds = rs_meta_pb.add_segments_key_bounds();
-        *new_key_bounds = key_bounds;
-        RowsetMetaSharedPtr meta_ptr = make_shared<RowsetMeta>();
-        meta_ptr->init_from_pb(rs_meta_pb);
-        RowsetSharedPtr res_ptr;
-        static_cast<void>(MockRowset::create_rowset(schema_, meta_ptr, 
&res_ptr, is_mem_rowset));
-        return res_ptr;
-    }
-
-private:
-    TabletSchemaSPtr schema_;
-    std::string rowset_path_;
-    UniqueRowsetIdGenerator rowset_id_generator_;
-};
-
-TEST_F(TestRowsetTree, TestTree) {
-    RowsetIdUnorderedSet rowset_ids;
-    RowsetVector vec;
-    auto rowset1 = create_rowset("0", "5");
-    vec.push_back(rowset1);
-    rowset_ids.insert(rowset1->rowset_id());
-
-    auto rowset2 = create_rowset("3", "5");
-    vec.push_back(rowset2);
-    rowset_ids.insert(rowset2->rowset_id());
-
-    auto rowset3 = create_rowset("5", "9");
-    vec.push_back(rowset3);
-    rowset_ids.insert(rowset3->rowset_id());
-
-    auto rowset4 = create_rowset("0", "0", true);
-    vec.push_back(rowset4);
-    rowset_ids.insert(rowset4->rowset_id());
-
-    RowsetTree tree;
-    ASSERT_FALSE(tree.Init(vec).ok());
-
-    vec.erase(vec.begin() + 3);
-    ASSERT_TRUE(tree.Init(vec).ok());
-
-    // "2" overlaps 0-5
-    vector<std::pair<RowsetSharedPtr, int32_t>> out;
-    tree.FindRowsetsWithKeyInRange("2", &rowset_ids, &out);
-    ASSERT_EQ(1, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-
-    // "4" overlaps 0-5, 3-5
-    out.clear();
-    tree.FindRowsetsWithKeyInRange("4", &rowset_ids, &out);
-    ASSERT_EQ(2, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-
-    // interval [3,4) overlaps 0-5, 3-5
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(Slice("3"), Slice("4"), &out);
-    ASSERT_EQ(2, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-
-    // interval [0,2) overlaps 0-5
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(Slice("0"), Slice("2"), &out);
-    ASSERT_EQ(1, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-
-    // interval [5,7) overlaps 0-5, 3-5, 5-9
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(Slice("5"), Slice("7"), &out);
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-    ASSERT_EQ(vec[2].get(), out[2].first.get());
-
-    // "3" overlaps 0-5, 3-5
-    out.clear();
-    tree.FindRowsetsWithKeyInRange("3", &rowset_ids, &out);
-    ASSERT_EQ(2, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-
-    // "5" overlaps 0-5, 3-5, 5-9
-    out.clear();
-    tree.FindRowsetsWithKeyInRange("5", &rowset_ids, &out);
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-    ASSERT_EQ(vec[2].get(), out[2].first.get());
-
-    // interval [0,5) overlaps 0-5, 3-5
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(Slice("0"), Slice("5"), &out);
-    ASSERT_EQ(2, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-
-    // interval [3,5) overlaps 0-5, 3-5
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(Slice("3"), Slice("5"), &out);
-    ASSERT_EQ(2, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-
-    // interval [-OO,3) overlaps 0-5
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(std::nullopt, Slice("3"), &out);
-    ASSERT_EQ(1, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-
-    // interval [-OO,5) overlaps 0-5, 3-5
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(std::nullopt, Slice("5"), &out);
-    ASSERT_EQ(2, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-
-    // interval [-OO,99) overlaps 0-5, 3-5, 5-9
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(std::nullopt, Slice("99"), &out);
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-    ASSERT_EQ(vec[2].get(), out[2].first.get());
-
-    // interval [6,+OO) overlaps 5-9
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(Slice("6"), std::nullopt, &out);
-    ASSERT_EQ(1, out.size());
-    ASSERT_EQ(vec[2].get(), out[0].first.get());
-
-    // interval [5,+OO) overlaps 0-5, 3-5, 5-9
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(Slice("5"), std::nullopt, &out);
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-    ASSERT_EQ(vec[2].get(), out[2].first.get());
-
-    // interval [4,+OO) overlaps 0-5, 3-5, 5-9
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(Slice("4"), std::nullopt, &out);
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-    ASSERT_EQ(vec[2].get(), out[2].first.get());
-
-    // interval [-OO,+OO) overlaps 0-5, 3-5, 5-9
-    out.clear();
-    tree.FindRowsetsIntersectingInterval(std::nullopt, std::nullopt, &out);
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(vec[0].get(), out[0].first.get());
-    ASSERT_EQ(vec[1].get(), out[1].first.get());
-    ASSERT_EQ(vec[2].get(), out[2].first.get());
-}
-
-TEST_F(TestRowsetTree, TestTreeRandomized) {
-    enum BoundOperator {
-        BOUND_LESS_THAN,
-        BOUND_LESS_EQUAL,
-        BOUND_GREATER_THAN,
-        BOUND_GREATER_EQUAL,
-        BOUND_EQUAL
-    };
-    const auto& GetStringPair = [](const BoundOperator op, int start, int 
range_length) {
-        while (true) {
-            string s1 = Substitute("$0", rand_rng_int(start, start + 
range_length));
-            string s2 = Substitute("$0", rand_rng_int(start, start + 
range_length));
-            int r = strcmp(s1.c_str(), s2.c_str());
-            switch (op) {
-            case BOUND_LESS_THAN:
-                if (r == 0) continue;
-                [[fallthrough]];
-            case BOUND_LESS_EQUAL:
-                return std::pair<string, string>(std::min(s1, s2), 
std::max(s1, s2));
-            case BOUND_GREATER_THAN:
-                if (r == 0) continue;
-                [[fallthrough]];
-            case BOUND_GREATER_EQUAL:
-                return std::pair<string, string>(std::max(s1, s2), 
std::min(s1, s2));
-            case BOUND_EQUAL:
-                return std::pair<string, string>(s1, s1);
-            }
-        }
-    };
-
-    RowsetVector vec;
-    for (int i = 0; i < 100; i++) {
-        std::pair<string, string> bound = GetStringPair(BOUND_LESS_EQUAL, 
1000, 900);
-        ASSERT_LE(bound.first, bound.second);
-        vec.push_back(shared_ptr<Rowset>(create_rowset(bound.first, 
bound.second)));
-    }
-    RowsetTree tree;
-    ASSERT_TRUE(tree.Init(vec).ok());
-
-    // When lower < upper.
-    vector<std::pair<RowsetSharedPtr, int32_t>> out;
-    for (int i = 0; i < 100; i++) {
-        out.clear();
-        std::pair<string, string> bound = GetStringPair(BOUND_LESS_THAN, 1000, 
900);
-        ASSERT_LT(bound.first, bound.second);
-        tree.FindRowsetsIntersectingInterval(Slice(bound.first), 
Slice(bound.second), &out);
-        for (const auto& e : out) {
-            std::vector<KeyBoundsPB> segments_key_bounds;
-            
static_cast<void>(e.first->get_segments_key_bounds(&segments_key_bounds));
-            ASSERT_EQ(1, segments_key_bounds.size());
-            string min = segments_key_bounds[0].min_key();
-            string max = segments_key_bounds[0].max_key();
-            if (min < bound.first) {
-                ASSERT_GE(max, bound.first);
-            } else {
-                ASSERT_LT(min, bound.second);
-            }
-            if (max >= bound.second) {
-                ASSERT_LT(min, bound.second);
-            } else {
-                ASSERT_GE(max, bound.first);
-            }
-        }
-    }
-
-    // Remove 50 rowsets, add 10 new rowsets, with non overlapping key range.
-    RowsetVector vec_to_del(vec.begin(), vec.begin() + 50);
-    RowsetVector vec_to_add;
-    for (int i = 0; i < 10; i++) {
-        std::pair<string, string> bound = GetStringPair(BOUND_LESS_EQUAL, 
2000, 900);
-        ASSERT_LE(bound.first, bound.second);
-        vec_to_add.push_back(shared_ptr<Rowset>(create_rowset(bound.first, 
bound.second)));
-    }
-
-    RowsetTree new_tree;
-    ModifyRowSetTree(tree, vec_to_del, vec_to_add, &new_tree);
-
-    // only 50 rowsets left in old key range "1000"-"1900"
-    out.clear();
-    new_tree.FindRowsetsIntersectingInterval(Slice("1000"), Slice("1999"), 
&out);
-    ASSERT_EQ(50, out.size());
-    // should get 10 new added rowsets with key range "2000"-"2900"
-    out.clear();
-    new_tree.FindRowsetsIntersectingInterval(Slice("2000"), Slice("2999"), 
&out);
-    ASSERT_EQ(10, out.size());
-    out.clear();
-    new_tree.FindRowsetsIntersectingInterval(Slice("1000"), Slice("2999"), 
&out);
-    ASSERT_EQ(60, out.size());
-}
-
-class TestRowsetTreePerformance : public TestRowsetTree,
-                                  public 
testing::WithParamInterface<std::tuple<int, int>> {};
-INSTANTIATE_TEST_SUITE_P(Parameters, TestRowsetTreePerformance,
-                         testing::Combine(
-                                 // Number of rowsets.
-                                 // Up to 500 rowsets (500*32MB = 16GB tablet)
-                                 testing::Values(10, 100, 250, 500),
-                                 // Number of query points in a batch.
-                                 testing::Values(10, 100, 500, 1000, 5000)));
-
-TEST_P(TestRowsetTreePerformance, TestPerformance) {
-    const int kNumRowsets = std::get<0>(GetParam());
-    const int kNumQueries = std::get<1>(GetParam());
-    const int kNumIterations = AllowSlowTests() ? 1000 : 10;
-
-    MonotonicStopWatch one_at_time_timer;
-    MonotonicStopWatch batch_timer;
-    RowsetIdUnorderedSet rowset_ids;
-    for (int i = 0; i < kNumIterations; i++) {
-        rowset_ids.clear();
-        // Create a bunch of rowsets, each of which spans about 10% of the 
"row space".
-        // The row space here is 4-digit 0-padded numbers.
-        RowsetVector vec = GenerateRandomRowsets(kNumRowsets);
-        for (auto rowset : vec) {
-            rowset_ids.insert(rowset->rowset_id());
-        }
-
-        RowsetTree tree;
-        ASSERT_TRUE(tree.Init(vec).ok());
-
-        vector<string> queries;
-        for (int j = 0; j < kNumQueries; j++) {
-            int query = rand_rng_int(0, 10000);
-            queries.emplace_back(StringPrintf("%04d", query));
-        }
-
-        int individual_matches = 0;
-        one_at_time_timer.start();
-        {
-            vector<std::pair<RowsetSharedPtr, int32_t>> out;
-            for (const auto& q : queries) {
-                out.clear();
-                tree.FindRowsetsWithKeyInRange(Slice(q), &rowset_ids, &out);
-                individual_matches += out.size();
-            }
-        }
-        one_at_time_timer.stop();
-
-        vector<Slice> query_slices;
-        for (const auto& q : queries) {
-            query_slices.emplace_back(q);
-        }
-
-        batch_timer.start();
-        std::sort(query_slices.begin(), query_slices.end(), 
Slice::Comparator());
-        int bulk_matches = 0;
-        {
-            tree.ForEachRowsetContainingKeys(
-                    query_slices, [&](RowsetSharedPtr rs, int slice_idx) { 
bulk_matches++; });
-        }
-        batch_timer.stop();
-
-        ASSERT_EQ(bulk_matches, individual_matches);
-    }
-
-    double batch_total = batch_timer.elapsed_time();
-    double oat_total = one_at_time_timer.elapsed_time();
-    const string& case_desc = StringPrintf("Q=% 5d R=% 5d", kNumQueries, 
kNumRowsets);
-    LOG(INFO) << StringPrintf("%s %10s %d ms", case_desc.c_str(), "1-by-1",
-                              static_cast<int>(oat_total / 1e6));
-    LOG(INFO) << StringPrintf("%s %10s %d ms (%.2fx)", case_desc.c_str(), 
"batched",
-                              static_cast<int>(batch_total / 1e6),
-                              batch_total ? (oat_total / batch_total) : 0);
-}
-
-TEST_F(TestRowsetTree, TestEndpointsConsistency) {
-    const int kNumRowsets = 1000;
-    RowsetVector vec = GenerateRandomRowsets(kNumRowsets);
-    // Add pathological one-key rows
-    for (int i = 0; i < 10; ++i) {
-        vec.push_back(create_rowset(StringPrintf("%04d", 11000), 
StringPrintf("%04d", 11000)));
-    }
-    vec.push_back(create_rowset(StringPrintf("%04d", 12000), 
StringPrintf("%04d", 12000)));
-    // Make tree
-    RowsetTree tree;
-    ASSERT_TRUE(tree.Init(vec).ok());
-    // Keep track of "currently open" intervals defined by the endpoints
-    unordered_set<RowsetSharedPtr> open;
-    // Keep track of all rowsets that have been visited
-    unordered_set<RowsetSharedPtr> visited;
-
-    Slice prev;
-    for (const RowsetTree::RSEndpoint& rse : tree.key_endpoints()) {
-        RowsetSharedPtr rs = rse.rowset_;
-        enum RowsetTree::EndpointType ept = rse.endpoint_;
-        const Slice& slice = rse.slice_;
-
-        ASSERT_TRUE(rs != nullptr) << "RowsetTree has an endpoint with no 
rowset";
-        ASSERT_TRUE(!slice.empty()) << "RowsetTree has an endpoint with no 
key";
-
-        if (!prev.empty()) {
-            ASSERT_LE(prev.compare(slice), 0);
-        }
-
-        std::vector<KeyBoundsPB> segments_key_bounds;
-        ASSERT_TRUE(rs->get_segments_key_bounds(&segments_key_bounds).ok());
-        ASSERT_EQ(1, segments_key_bounds.size());
-        string min = segments_key_bounds[0].min_key();
-        string max = segments_key_bounds[0].max_key();
-        if (ept == RowsetTree::START) {
-            ASSERT_EQ(min, slice.to_string());
-            ASSERT_TRUE(InsertIfNotPresent(&open, rs));
-            ASSERT_TRUE(InsertIfNotPresent(&visited, rs));
-        } else if (ept == RowsetTree::STOP) {
-            ASSERT_EQ(max, slice.to_string());
-            ASSERT_TRUE(open.erase(rs) == 1);
-        } else {
-            FAIL() << "No such endpoint type exists";
-        }
-    }
-}
-
-} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to