This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 8bd06aff7ed [Chore](MoW) remove unused code about rowset tree (#26282) 8bd06aff7ed is described below commit 8bd06aff7ed07f4c9c0d71163122d7700c031228 Author: Xin Liao <liaoxin...@126.com> AuthorDate: Thu Nov 2 20:25:27 2023 +0800 [Chore](MoW) remove unused code about rowset tree (#26282) --- be/src/olap/delete_bitmap_calculator.h | 1 - be/src/olap/rowset/rowset_tree.cpp | 276 ------------------- be/src/olap/rowset/rowset_tree.h | 139 ---------- be/src/olap/tablet.cpp | 2 - be/test/olap/rowset/rowset_tree_test.cpp | 459 ------------------------------- 5 files changed, 877 deletions(-) diff --git a/be/src/olap/delete_bitmap_calculator.h b/be/src/olap/delete_bitmap_calculator.h index 902db7cb71c..dd17fe7b686 100644 --- a/be/src/olap/delete_bitmap_calculator.h +++ b/be/src/olap/delete_bitmap_calculator.h @@ -33,7 +33,6 @@ #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_meta.h" #include "olap/rowset/rowset_reader.h" -#include "olap/rowset/rowset_tree.h" #include "olap/rowset/segment_v2/segment.h" #include "olap/tablet_meta.h" #include "olap/tablet_schema.h" diff --git a/be/src/olap/rowset/rowset_tree.cpp b/be/src/olap/rowset/rowset_tree.cpp deleted file mode 100644 index 8ac153435fc..00000000000 --- a/be/src/olap/rowset/rowset_tree.cpp +++ /dev/null @@ -1,276 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// -// This file is copied from -// https://github.com/apache/kudu/blob/master/src/kudu/tablet/rowset_tree.cc -// and modified by Doris - -#include "olap/rowset/rowset_tree.h" - -#include <gen_cpp/olap_file.pb.h> -#include <glog/logging.h> - -#include <algorithm> -#include <cstddef> -#include <functional> -#include <iterator> -#include <memory> -#include <ostream> -#include <string> -#include <vector> - -#include "gutil/stl_util.h" -#include "gutil/strings/substitute.h" -#include "olap/rowset/rowset.h" -#include "olap/rowset/rowset_meta.h" -#include "util/interval_tree-inl.h" -#include "util/interval_tree.h" -#include "util/slice.h" - -using std::shared_ptr; -using std::string; -using std::unique_ptr; -using std::vector; - -namespace doris { - -namespace { - -// Lexicographic, first by slice, then by rowset pointer, then by segment id, then by start/stop -bool RSEndpointBySliceCompare(const RowsetTree::RSEndpoint& a, const RowsetTree::RSEndpoint& b) { - int slice_cmp = a.slice_.compare(b.slice_); - if (slice_cmp) return slice_cmp < 0; - ptrdiff_t rs_cmp = a.rowset_.get() - b.rowset_.get(); - if (rs_cmp) return rs_cmp < 0; - int seg_cmp = a.segment_id_ < b.segment_id_; - if (seg_cmp) return seg_cmp < 0; - if (a.endpoint_ != b.endpoint_) return a.endpoint_ == RowsetTree::START; - return false; -} - -// Wrapper used when making batch queries into the interval tree. -struct QueryStruct { - // The slice of the operation performing the query. - Slice slice; - // The original index of this slice in the incoming batch. - int idx; -}; - -} // anonymous namespace - -// Entry for use in the interval tree. -struct RowsetWithBounds { - string min_key; - string max_key; - - // NOTE: the ordering of struct fields here is purposeful: we access - // min_key and max_key frequently, so putting them first in the struct - // ensures they fill a single 64-byte cache line (each is 32 bytes). - // The 'rowset' pointer is accessed comparitively rarely. - RowsetSharedPtr rowset; - int32_t segment_id; -}; - -// Traits struct for IntervalTree. -struct RowsetIntervalTraits { - typedef Slice point_type; - typedef RowsetWithBounds* interval_type; - - static Slice get_left(const RowsetWithBounds* rs) { return Slice(rs->min_key); } - - static Slice get_right(const RowsetWithBounds* rs) { return Slice(rs->max_key); } - - static int compare(const Slice& a, const Slice& b) { return a.compare(b); } - - static int compare(const Slice& a, const QueryStruct& b) { return a.compare(b.slice); } - - static int compare(const QueryStruct& a, const Slice& b) { return -compare(b, a); } - - // When 'a' is std::nullopt: - // (1)'a' is +OO when 'positive_direction' is true; - // (2)'a' is -OO when 'positive_direction' is false. - static int compare(const std::optional<Slice>& a, const Slice& b, const EndpointIfNone& type) { - if (a == std::nullopt) { - return ((POSITIVE_INFINITY == type) ? 1 : -1); - } - - return compare(*a, b); - } - - static int compare(const Slice& a, const std::optional<Slice>& b, const EndpointIfNone& type) { - return -compare(b, a, type); - } -}; - -RowsetTree::RowsetTree() : initted_(false) {} - -Status RowsetTree::Init(const RowsetVector& rowsets) { - if (initted_) { - return Status::InternalError("Call Init method on a RowsetTree that's already inited!"); - } - std::vector<RowsetWithBounds*> entries; - ElementDeleter deleter(&entries); - entries.reserve(rowsets.size()); - std::vector<RSEndpoint> endpoints; - endpoints.reserve(rowsets.size() * 2); - - // Iterate over each of the provided Rowsets, fetching their - // bounds and adding them to the local vectors. - for (const RowsetSharedPtr& rs : rowsets) { - std::vector<KeyBoundsPB> segments_key_bounds; - Status s = rs->get_segments_key_bounds(&segments_key_bounds); - if (!s.ok()) { - LOG(WARNING) << "Unable to construct RowsetTree: " << rs->rowset_id() - << " unable to determine its bounds: " << s.to_string(); - return s; - } - DCHECK_EQ(segments_key_bounds.size(), rs->num_segments()); - - for (auto i = 0; i < rs->num_segments(); i++) { - unique_ptr<RowsetWithBounds> rsit(new RowsetWithBounds()); - rsit->rowset = rs; - rsit->segment_id = i; - string min_key = segments_key_bounds[i].min_key(); - string max_key = segments_key_bounds[i].max_key(); - DCHECK_LE(min_key.compare(max_key), 0) - << "Rowset min: " << min_key << " must be <= max: " << max_key; - - // Load bounds and save entry - rsit->min_key = std::move(min_key); - rsit->max_key = std::move(max_key); - - // Load into key endpoints. - endpoints.emplace_back(rsit->rowset, i, START, rsit->min_key); - endpoints.emplace_back(rsit->rowset, i, STOP, rsit->max_key); - - entries.push_back(rsit.release()); - } - } - - // Sort endpoints - std::sort(endpoints.begin(), endpoints.end(), RSEndpointBySliceCompare); - - // Install the vectors into the object. - entries_.swap(entries); - tree_.reset(new IntervalTree<RowsetIntervalTraits>(entries_)); - key_endpoints_.swap(endpoints); - all_rowsets_.assign(rowsets.begin(), rowsets.end()); - - // Build the mapping from RS_ID to RS. - rs_by_id_.clear(); - for (auto& rs : all_rowsets_) { - if (!rs_by_id_.insert({rs->rowset_id(), rs}).second) { - return Status::InternalError(strings::Substitute( - "Add rowset with $0 to rowset tree of tablet $1 failed", - rs->rowset_id().to_string(), rs->rowset_meta()->tablet_uid().to_string())); - } - } - - initted_ = true; - - return Status::OK(); -} - -void RowsetTree::FindRowsetsIntersectingInterval( - const std::optional<Slice>& lower_bound, const std::optional<Slice>& upper_bound, - vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const { - DCHECK(initted_); - - vector<RowsetWithBounds*> from_tree; - from_tree.reserve(all_rowsets_.size()); - tree_->FindIntersectingInterval(lower_bound, upper_bound, &from_tree); - rowsets->reserve(rowsets->size() + from_tree.size()); - for (RowsetWithBounds* rs : from_tree) { - rowsets->emplace_back(rs->rowset, rs->segment_id); - } -} - -void RowsetTree::FindRowsetsWithKeyInRange( - const Slice& encoded_key, const RowsetIdUnorderedSet* rowset_ids, - vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const { - DCHECK(initted_); - - // Query the interval tree to efficiently find rowsets with known bounds - // whose ranges overlap the probe key. - vector<RowsetWithBounds*> from_tree; - from_tree.reserve(all_rowsets_.size()); - tree_->FindContainingPoint(encoded_key, &from_tree); - rowsets->reserve(rowsets->size() + from_tree.size()); - for (RowsetWithBounds* rs : from_tree) { - if (!rowset_ids || rowset_ids->find(rs->rowset->rowset_id()) != rowset_ids->end()) { - rowsets->emplace_back(rs->rowset, rs->segment_id); - } - } -} - -void RowsetTree::ForEachRowsetContainingKeys( - const std::vector<Slice>& encoded_keys, - const std::function<void(RowsetSharedPtr, int)>& cb) const { - DCHECK(std::is_sorted(encoded_keys.cbegin(), encoded_keys.cend(), Slice::Comparator())); - // The interval tree batch query callback would naturally just give us back - // the matching Slices, but that won't allow us to easily tell the caller - // which specific operation _index_ matched the Rowset. So, we make a vector - // of QueryStructs to pair the Slice with its original index. - vector<QueryStruct> queries; - queries.resize(encoded_keys.size()); - for (int i = 0; i < encoded_keys.size(); i++) { - queries[i] = {encoded_keys[i], i}; - } - - tree_->ForEachIntervalContainingPoints( - queries, [&](const QueryStruct& qs, RowsetWithBounds* rs) { cb(rs->rowset, qs.idx); }); -} - -RowsetTree::~RowsetTree() { - for (RowsetWithBounds* e : entries_) { - delete e; - } - entries_.clear(); -} - -void ModifyRowSetTree(const RowsetTree& old_tree, const RowsetVector& rowsets_to_remove, - const RowsetVector& rowsets_to_add, RowsetTree* new_tree) { - RowsetVector post_swap; - - // O(n^2) diff algorithm to collect the set of rowsets excluding - // the rowsets that were included in the compaction - int num_removed = 0; - - for (const RowsetSharedPtr& rs : old_tree.all_rowsets()) { - // Determine if it should be removed - bool should_remove = false; - for (const RowsetSharedPtr& to_remove : rowsets_to_remove) { - if (to_remove->rowset_id() == rs->rowset_id()) { - should_remove = true; - num_removed++; - break; - } - } - if (!should_remove) { - post_swap.push_back(rs); - } - } - - CHECK_EQ(num_removed, rowsets_to_remove.size()); - - // Then push the new rowsets on the end of the new list - std::copy(rowsets_to_add.begin(), rowsets_to_add.end(), std::back_inserter(post_swap)); - - CHECK(new_tree->Init(post_swap).ok()); -} - -} // namespace doris diff --git a/be/src/olap/rowset/rowset_tree.h b/be/src/olap/rowset/rowset_tree.h deleted file mode 100644 index 5ebabca0f1e..00000000000 --- a/be/src/olap/rowset/rowset_tree.h +++ /dev/null @@ -1,139 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// -// This file is copied from -// https://github.com/apache/kudu/blob/master/src/kudu/tablet/rowset_tree.h -// and modified by Doris - -#pragma once - -#include <cstdint> -#include <functional> -#include <memory> -#include <optional> -#include <unordered_map> -#include <utility> -#include <vector> - -#include "common/status.h" -#include "gutil/map-util.h" -#include "olap/olap_common.h" -#include "olap/rowset/rowset.h" -#include "util/slice.h" - -namespace doris { - -template <class Traits> -class IntervalTree; -struct RowsetIntervalTraits; -struct RowsetWithBounds; - -// Used often enough, may as well typedef it. -typedef std::vector<RowsetSharedPtr> RowsetVector; - -// Class which encapsulates the set of rowsets which are active for a given -// Tablet. This provides efficient lookup by key for Rowsets which may overlap -// that key range. -// -// Additionally, the rowset tree maintains information about the implicit -// intervals generated by the row sets (for instance, if a tablet has -// rowsets [0, 2] and [1, 3] it has three implicit contiguous intervals: -// [0, 1], [1, 2], and [2, 3]. -class RowsetTree { -public: - // An RSEndpoint is a POD which associates a rowset, an EndpointType - // (either the START or STOP of an interval), and the key at which the - // endpoint is located. - enum EndpointType { START, STOP }; - struct RSEndpoint { - RSEndpoint(RowsetSharedPtr rowset, uint32_t segment_id, EndpointType endpoint, Slice slice) - : rowset_(rowset), segment_id_(segment_id), endpoint_(endpoint), slice_(slice) {} - - RowsetSharedPtr rowset_; - uint32_t segment_id_; - enum EndpointType endpoint_; - Slice slice_; - }; - - RowsetTree(); - Status Init(const RowsetVector& rowsets); - ~RowsetTree(); - - // Return Rowsets whose id in rowset_ids and range may contain the given encoded key. - // - // The returned pointers are guaranteed to be valid at least until this - // RowsetTree object is Reset(). - void FindRowsetsWithKeyInRange(const Slice& encoded_key, const RowsetIdUnorderedSet* rowset_ids, - vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const; - - // Call 'cb(rowset, index)' for each (rowset, index) pair such that - // 'encoded_keys[index]' may be within the bounds of 'rowset'. - // - // See IntervalTree::ForEachIntervalContainingPoints for additional - // information on the particular order in which the callback will be called. - // - // REQUIRES: 'encoded_keys' must be in sorted order. - void ForEachRowsetContainingKeys(const std::vector<Slice>& encoded_keys, - const std::function<void(RowsetSharedPtr, int)>& cb) const; - - // When 'lower_bound' is boost::none, it means negative infinity. - // When 'upper_bound' is boost::none, it means positive infinity. - // So the query interval can be one of below: - // [-OO, +OO) - // [-OO, upper_bound) - // [lower_bound, +OO) - // [lower_bound, upper_bound) - void FindRowsetsIntersectingInterval( - const std::optional<Slice>& lower_bound, const std::optional<Slice>& upper_bound, - vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const; - - const RowsetVector& all_rowsets() const { return all_rowsets_; } - - RowsetSharedPtr rs_by_id(RowsetId rs_id) const { return FindPtrOrNull(rs_by_id_, rs_id); } - - // Iterates over RowsetTree::RSEndpoint, guaranteed to be ordered and for - // any rowset to appear exactly twice, once at its start slice and once at - // its stop slice, equivalent to its GetBounds() values. - const std::vector<RSEndpoint>& key_endpoints() const { return key_endpoints_; } - -private: - // Interval tree of the rowsets. Used to efficiently find rowsets which might contain - // a probe row. - std::unique_ptr<IntervalTree<RowsetIntervalTraits>> tree_; - - // Ordered map of all the interval endpoints, holding the implicit contiguous - // intervals - std::vector<RSEndpoint> key_endpoints_; - - // Container for all of the entries in tree_. IntervalTree does - // not itself manage memory, so this provides a simple way to enumerate - // all the entry structs and free them in the destructor. - std::vector<RowsetWithBounds*> entries_; - - // All of the rowsets which were put in this RowsetTree. - RowsetVector all_rowsets_; - - // The Rowsets in this RowsetTree, keyed by their id. - std::unordered_map<RowsetId, RowsetSharedPtr, HashOfRowsetId> rs_by_id_; - - bool initted_; -}; - -void ModifyRowSetTree(const RowsetTree& old_tree, const RowsetVector& rowsets_to_remove, - const RowsetVector& rowsets_to_add, RowsetTree* new_tree); - -} // namespace doris diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index b4fbb76ce52..4cc03013091 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -305,7 +305,6 @@ Status Tablet::_init_once_action() { _tablet_meta->compaction_policy()); #endif - RowsetVector rowset_vec; for (const auto& rs_meta : _tablet_meta->all_rs_metas()) { Version version = rs_meta->version(); RowsetSharedPtr rowset; @@ -317,7 +316,6 @@ Status Tablet::_init_once_action() { << ", res=" << res; return res; } - rowset_vec.push_back(rowset); _rs_version_map[version] = std::move(rowset); } diff --git a/be/test/olap/rowset/rowset_tree_test.cpp b/be/test/olap/rowset/rowset_tree_test.cpp deleted file mode 100644 index a6f7965f447..00000000000 --- a/be/test/olap/rowset/rowset_tree_test.cpp +++ /dev/null @@ -1,459 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// -// This file is copied from -// https://github.com/apache/kudu/blob/master/src/kudu/tablet/rowset_tree-test.cc -// and modified by Doris - -#include "olap/rowset/rowset_tree.h" - -#include <gen_cpp/olap_file.pb.h> -#include <glog/logging.h> -#include <gtest/gtest-message.h> -#include <gtest/gtest-param-test.h> -#include <gtest/gtest-test-part.h> - -#include <algorithm> -#include <cstdlib> -#include <cstring> -#include <memory> -#include <string> -#include <tuple> -#include <unordered_set> -#include <utility> -#include <vector> - -#include "gtest/gtest_pred_impl.h" -#include "gutil/map-util.h" -#include "gutil/stringprintf.h" -#include "gutil/strings/substitute.h" -#include "olap/rowset/rowset.h" -#include "olap/rowset/rowset_meta.h" -#include "olap/rowset/unique_rowset_id_generator.h" -#include "olap/tablet_schema.h" -#include "testutil/mock_rowset.h" -#include "testutil/test_util.h" -#include "util/slice.h" -#include "util/stopwatch.hpp" - -using std::make_shared; -using std::shared_ptr; -using std::string; -using std::unordered_set; -using std::vector; -using strings::Substitute; - -namespace doris { - -class TestRowsetTree : public testing::Test { -public: - TestRowsetTree() : rowset_id_generator_({0, 0}) {} - - void SetUp() { - schema_ = std::make_shared<TabletSchema>(); - TabletSchemaPB schema_pb; - schema_pb.set_keys_type(UNIQUE_KEYS); - schema_->init_from_pb(schema_pb); - } - - // Generates random rowsets with keys between 0 and 10000 - RowsetVector GenerateRandomRowsets(int num_sets) { - RowsetVector vec; - for (int i = 0; i < num_sets; i++) { - int min = rand() % 9000; - int max = min + 1000; - vec.push_back(create_rowset(StringPrintf("%04d", min), StringPrintf("%04d", max))); - } - return vec; - } - - RowsetSharedPtr create_rowset(const string& min_key, const string& max_key, - bool is_mem_rowset = false) { - RowsetMetaPB rs_meta_pb; - rs_meta_pb.set_rowset_id_v2(rowset_id_generator_.next_id().to_string()); - rs_meta_pb.set_num_segments(1); - KeyBoundsPB key_bounds; - key_bounds.set_min_key(min_key); - key_bounds.set_max_key(max_key); - KeyBoundsPB* new_key_bounds = rs_meta_pb.add_segments_key_bounds(); - *new_key_bounds = key_bounds; - RowsetMetaSharedPtr meta_ptr = make_shared<RowsetMeta>(); - meta_ptr->init_from_pb(rs_meta_pb); - RowsetSharedPtr res_ptr; - static_cast<void>(MockRowset::create_rowset(schema_, meta_ptr, &res_ptr, is_mem_rowset)); - return res_ptr; - } - -private: - TabletSchemaSPtr schema_; - std::string rowset_path_; - UniqueRowsetIdGenerator rowset_id_generator_; -}; - -TEST_F(TestRowsetTree, TestTree) { - RowsetIdUnorderedSet rowset_ids; - RowsetVector vec; - auto rowset1 = create_rowset("0", "5"); - vec.push_back(rowset1); - rowset_ids.insert(rowset1->rowset_id()); - - auto rowset2 = create_rowset("3", "5"); - vec.push_back(rowset2); - rowset_ids.insert(rowset2->rowset_id()); - - auto rowset3 = create_rowset("5", "9"); - vec.push_back(rowset3); - rowset_ids.insert(rowset3->rowset_id()); - - auto rowset4 = create_rowset("0", "0", true); - vec.push_back(rowset4); - rowset_ids.insert(rowset4->rowset_id()); - - RowsetTree tree; - ASSERT_FALSE(tree.Init(vec).ok()); - - vec.erase(vec.begin() + 3); - ASSERT_TRUE(tree.Init(vec).ok()); - - // "2" overlaps 0-5 - vector<std::pair<RowsetSharedPtr, int32_t>> out; - tree.FindRowsetsWithKeyInRange("2", &rowset_ids, &out); - ASSERT_EQ(1, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - - // "4" overlaps 0-5, 3-5 - out.clear(); - tree.FindRowsetsWithKeyInRange("4", &rowset_ids, &out); - ASSERT_EQ(2, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - - // interval [3,4) overlaps 0-5, 3-5 - out.clear(); - tree.FindRowsetsIntersectingInterval(Slice("3"), Slice("4"), &out); - ASSERT_EQ(2, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - - // interval [0,2) overlaps 0-5 - out.clear(); - tree.FindRowsetsIntersectingInterval(Slice("0"), Slice("2"), &out); - ASSERT_EQ(1, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - - // interval [5,7) overlaps 0-5, 3-5, 5-9 - out.clear(); - tree.FindRowsetsIntersectingInterval(Slice("5"), Slice("7"), &out); - ASSERT_EQ(3, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - ASSERT_EQ(vec[2].get(), out[2].first.get()); - - // "3" overlaps 0-5, 3-5 - out.clear(); - tree.FindRowsetsWithKeyInRange("3", &rowset_ids, &out); - ASSERT_EQ(2, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - - // "5" overlaps 0-5, 3-5, 5-9 - out.clear(); - tree.FindRowsetsWithKeyInRange("5", &rowset_ids, &out); - ASSERT_EQ(3, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - ASSERT_EQ(vec[2].get(), out[2].first.get()); - - // interval [0,5) overlaps 0-5, 3-5 - out.clear(); - tree.FindRowsetsIntersectingInterval(Slice("0"), Slice("5"), &out); - ASSERT_EQ(2, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - - // interval [3,5) overlaps 0-5, 3-5 - out.clear(); - tree.FindRowsetsIntersectingInterval(Slice("3"), Slice("5"), &out); - ASSERT_EQ(2, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - - // interval [-OO,3) overlaps 0-5 - out.clear(); - tree.FindRowsetsIntersectingInterval(std::nullopt, Slice("3"), &out); - ASSERT_EQ(1, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - - // interval [-OO,5) overlaps 0-5, 3-5 - out.clear(); - tree.FindRowsetsIntersectingInterval(std::nullopt, Slice("5"), &out); - ASSERT_EQ(2, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - - // interval [-OO,99) overlaps 0-5, 3-5, 5-9 - out.clear(); - tree.FindRowsetsIntersectingInterval(std::nullopt, Slice("99"), &out); - ASSERT_EQ(3, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - ASSERT_EQ(vec[2].get(), out[2].first.get()); - - // interval [6,+OO) overlaps 5-9 - out.clear(); - tree.FindRowsetsIntersectingInterval(Slice("6"), std::nullopt, &out); - ASSERT_EQ(1, out.size()); - ASSERT_EQ(vec[2].get(), out[0].first.get()); - - // interval [5,+OO) overlaps 0-5, 3-5, 5-9 - out.clear(); - tree.FindRowsetsIntersectingInterval(Slice("5"), std::nullopt, &out); - ASSERT_EQ(3, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - ASSERT_EQ(vec[2].get(), out[2].first.get()); - - // interval [4,+OO) overlaps 0-5, 3-5, 5-9 - out.clear(); - tree.FindRowsetsIntersectingInterval(Slice("4"), std::nullopt, &out); - ASSERT_EQ(3, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - ASSERT_EQ(vec[2].get(), out[2].first.get()); - - // interval [-OO,+OO) overlaps 0-5, 3-5, 5-9 - out.clear(); - tree.FindRowsetsIntersectingInterval(std::nullopt, std::nullopt, &out); - ASSERT_EQ(3, out.size()); - ASSERT_EQ(vec[0].get(), out[0].first.get()); - ASSERT_EQ(vec[1].get(), out[1].first.get()); - ASSERT_EQ(vec[2].get(), out[2].first.get()); -} - -TEST_F(TestRowsetTree, TestTreeRandomized) { - enum BoundOperator { - BOUND_LESS_THAN, - BOUND_LESS_EQUAL, - BOUND_GREATER_THAN, - BOUND_GREATER_EQUAL, - BOUND_EQUAL - }; - const auto& GetStringPair = [](const BoundOperator op, int start, int range_length) { - while (true) { - string s1 = Substitute("$0", rand_rng_int(start, start + range_length)); - string s2 = Substitute("$0", rand_rng_int(start, start + range_length)); - int r = strcmp(s1.c_str(), s2.c_str()); - switch (op) { - case BOUND_LESS_THAN: - if (r == 0) continue; - [[fallthrough]]; - case BOUND_LESS_EQUAL: - return std::pair<string, string>(std::min(s1, s2), std::max(s1, s2)); - case BOUND_GREATER_THAN: - if (r == 0) continue; - [[fallthrough]]; - case BOUND_GREATER_EQUAL: - return std::pair<string, string>(std::max(s1, s2), std::min(s1, s2)); - case BOUND_EQUAL: - return std::pair<string, string>(s1, s1); - } - } - }; - - RowsetVector vec; - for (int i = 0; i < 100; i++) { - std::pair<string, string> bound = GetStringPair(BOUND_LESS_EQUAL, 1000, 900); - ASSERT_LE(bound.first, bound.second); - vec.push_back(shared_ptr<Rowset>(create_rowset(bound.first, bound.second))); - } - RowsetTree tree; - ASSERT_TRUE(tree.Init(vec).ok()); - - // When lower < upper. - vector<std::pair<RowsetSharedPtr, int32_t>> out; - for (int i = 0; i < 100; i++) { - out.clear(); - std::pair<string, string> bound = GetStringPair(BOUND_LESS_THAN, 1000, 900); - ASSERT_LT(bound.first, bound.second); - tree.FindRowsetsIntersectingInterval(Slice(bound.first), Slice(bound.second), &out); - for (const auto& e : out) { - std::vector<KeyBoundsPB> segments_key_bounds; - static_cast<void>(e.first->get_segments_key_bounds(&segments_key_bounds)); - ASSERT_EQ(1, segments_key_bounds.size()); - string min = segments_key_bounds[0].min_key(); - string max = segments_key_bounds[0].max_key(); - if (min < bound.first) { - ASSERT_GE(max, bound.first); - } else { - ASSERT_LT(min, bound.second); - } - if (max >= bound.second) { - ASSERT_LT(min, bound.second); - } else { - ASSERT_GE(max, bound.first); - } - } - } - - // Remove 50 rowsets, add 10 new rowsets, with non overlapping key range. - RowsetVector vec_to_del(vec.begin(), vec.begin() + 50); - RowsetVector vec_to_add; - for (int i = 0; i < 10; i++) { - std::pair<string, string> bound = GetStringPair(BOUND_LESS_EQUAL, 2000, 900); - ASSERT_LE(bound.first, bound.second); - vec_to_add.push_back(shared_ptr<Rowset>(create_rowset(bound.first, bound.second))); - } - - RowsetTree new_tree; - ModifyRowSetTree(tree, vec_to_del, vec_to_add, &new_tree); - - // only 50 rowsets left in old key range "1000"-"1900" - out.clear(); - new_tree.FindRowsetsIntersectingInterval(Slice("1000"), Slice("1999"), &out); - ASSERT_EQ(50, out.size()); - // should get 10 new added rowsets with key range "2000"-"2900" - out.clear(); - new_tree.FindRowsetsIntersectingInterval(Slice("2000"), Slice("2999"), &out); - ASSERT_EQ(10, out.size()); - out.clear(); - new_tree.FindRowsetsIntersectingInterval(Slice("1000"), Slice("2999"), &out); - ASSERT_EQ(60, out.size()); -} - -class TestRowsetTreePerformance : public TestRowsetTree, - public testing::WithParamInterface<std::tuple<int, int>> {}; -INSTANTIATE_TEST_SUITE_P(Parameters, TestRowsetTreePerformance, - testing::Combine( - // Number of rowsets. - // Up to 500 rowsets (500*32MB = 16GB tablet) - testing::Values(10, 100, 250, 500), - // Number of query points in a batch. - testing::Values(10, 100, 500, 1000, 5000))); - -TEST_P(TestRowsetTreePerformance, TestPerformance) { - const int kNumRowsets = std::get<0>(GetParam()); - const int kNumQueries = std::get<1>(GetParam()); - const int kNumIterations = AllowSlowTests() ? 1000 : 10; - - MonotonicStopWatch one_at_time_timer; - MonotonicStopWatch batch_timer; - RowsetIdUnorderedSet rowset_ids; - for (int i = 0; i < kNumIterations; i++) { - rowset_ids.clear(); - // Create a bunch of rowsets, each of which spans about 10% of the "row space". - // The row space here is 4-digit 0-padded numbers. - RowsetVector vec = GenerateRandomRowsets(kNumRowsets); - for (auto rowset : vec) { - rowset_ids.insert(rowset->rowset_id()); - } - - RowsetTree tree; - ASSERT_TRUE(tree.Init(vec).ok()); - - vector<string> queries; - for (int j = 0; j < kNumQueries; j++) { - int query = rand_rng_int(0, 10000); - queries.emplace_back(StringPrintf("%04d", query)); - } - - int individual_matches = 0; - one_at_time_timer.start(); - { - vector<std::pair<RowsetSharedPtr, int32_t>> out; - for (const auto& q : queries) { - out.clear(); - tree.FindRowsetsWithKeyInRange(Slice(q), &rowset_ids, &out); - individual_matches += out.size(); - } - } - one_at_time_timer.stop(); - - vector<Slice> query_slices; - for (const auto& q : queries) { - query_slices.emplace_back(q); - } - - batch_timer.start(); - std::sort(query_slices.begin(), query_slices.end(), Slice::Comparator()); - int bulk_matches = 0; - { - tree.ForEachRowsetContainingKeys( - query_slices, [&](RowsetSharedPtr rs, int slice_idx) { bulk_matches++; }); - } - batch_timer.stop(); - - ASSERT_EQ(bulk_matches, individual_matches); - } - - double batch_total = batch_timer.elapsed_time(); - double oat_total = one_at_time_timer.elapsed_time(); - const string& case_desc = StringPrintf("Q=% 5d R=% 5d", kNumQueries, kNumRowsets); - LOG(INFO) << StringPrintf("%s %10s %d ms", case_desc.c_str(), "1-by-1", - static_cast<int>(oat_total / 1e6)); - LOG(INFO) << StringPrintf("%s %10s %d ms (%.2fx)", case_desc.c_str(), "batched", - static_cast<int>(batch_total / 1e6), - batch_total ? (oat_total / batch_total) : 0); -} - -TEST_F(TestRowsetTree, TestEndpointsConsistency) { - const int kNumRowsets = 1000; - RowsetVector vec = GenerateRandomRowsets(kNumRowsets); - // Add pathological one-key rows - for (int i = 0; i < 10; ++i) { - vec.push_back(create_rowset(StringPrintf("%04d", 11000), StringPrintf("%04d", 11000))); - } - vec.push_back(create_rowset(StringPrintf("%04d", 12000), StringPrintf("%04d", 12000))); - // Make tree - RowsetTree tree; - ASSERT_TRUE(tree.Init(vec).ok()); - // Keep track of "currently open" intervals defined by the endpoints - unordered_set<RowsetSharedPtr> open; - // Keep track of all rowsets that have been visited - unordered_set<RowsetSharedPtr> visited; - - Slice prev; - for (const RowsetTree::RSEndpoint& rse : tree.key_endpoints()) { - RowsetSharedPtr rs = rse.rowset_; - enum RowsetTree::EndpointType ept = rse.endpoint_; - const Slice& slice = rse.slice_; - - ASSERT_TRUE(rs != nullptr) << "RowsetTree has an endpoint with no rowset"; - ASSERT_TRUE(!slice.empty()) << "RowsetTree has an endpoint with no key"; - - if (!prev.empty()) { - ASSERT_LE(prev.compare(slice), 0); - } - - std::vector<KeyBoundsPB> segments_key_bounds; - ASSERT_TRUE(rs->get_segments_key_bounds(&segments_key_bounds).ok()); - ASSERT_EQ(1, segments_key_bounds.size()); - string min = segments_key_bounds[0].min_key(); - string max = segments_key_bounds[0].max_key(); - if (ept == RowsetTree::START) { - ASSERT_EQ(min, slice.to_string()); - ASSERT_TRUE(InsertIfNotPresent(&open, rs)); - ASSERT_TRUE(InsertIfNotPresent(&visited, rs)); - } else if (ept == RowsetTree::STOP) { - ASSERT_EQ(max, slice.to_string()); - ASSERT_TRUE(open.erase(rs) == 1); - } else { - FAIL() << "No such endpoint type exists"; - } - } -} - -} // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org