This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 139cd3d11a [Improvement] remove olap filters when use in key ranges (#10278) 139cd3d11a is described below commit 139cd3d11acee071293c8badf14b432340e5aaac Author: Gabriel <gabrielleeb...@gmail.com> AuthorDate: Thu Jun 23 09:12:29 2022 +0800 [Improvement] remove olap filters when use in key ranges (#10278) --- be/src/exec/olap_common.h | 12 ++++---- be/src/exec/olap_scan_node.cpp | 53 ++++++++++++++++++----------------- be/src/exec/olap_scan_node.h | 3 +- be/src/vec/exec/volap_scan_node.cpp | 56 ++++++++++++++++++++----------------- be/src/vec/exec/volap_scan_node.h | 3 +- be/test/exec/olap_common_test.cpp | 54 +++++++++++++++++++++++++---------- 6 files changed, 106 insertions(+), 75 deletions(-) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 9eb0e0a3b2..41d8f7aa5c 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -256,7 +256,7 @@ public: _is_convertible(true) {} template <class T> - Status extend_scan_key(ColumnValueRange<T>& range, int32_t max_scan_key_num); + Status extend_scan_key(ColumnValueRange<T>& range, int32_t max_scan_key_num, bool* exact_value); Status get_key_range(std::vector<std::unique_ptr<OlapScanRange>>* key_range); @@ -738,9 +738,10 @@ bool ColumnValueRange<T>::has_intersection(ColumnValueRange<T>& range) { } template <class T> -Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t max_scan_key_num) { +Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t max_scan_key_num, + bool* exact_value) { using namespace std; - typedef typename set<T>::const_iterator const_iterator_type; + using ConstIterator = typename set<T>::const_iterator; // 1. clear ScanKey if some column range is empty if (range.is_empty_value_range()) { @@ -760,6 +761,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t max_sca if (range.get_fixed_value_size() > max_scan_key_num / scan_keys_size) { if (range.is_range_value_convertible()) { range.convert_to_range_value(); + *exact_value = false; } else { return Status::OK(); } @@ -777,7 +779,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t max_sca // 3.1.1 construct num of fixed value ScanKey (begin_key == end_key) if (_begin_scan_keys.empty()) { const set<T>& fixed_value_set = range.get_fixed_value_set(); - const_iterator_type iter = fixed_value_set.begin(); + ConstIterator iter = fixed_value_set.begin(); for (; iter != fixed_value_set.end(); ++iter) { _begin_scan_keys.emplace_back(); @@ -801,7 +803,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t max_sca OlapTuple start_base_key_range = _begin_scan_keys[i]; OlapTuple end_base_key_range = _end_scan_keys[i]; - const_iterator_type iter = fixed_value_set.begin(); + ConstIterator iter = fixed_value_set.begin(); for (; iter != fixed_value_set.end(); ++iter) { // alter the first ScanKey in original place diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 94d3aacf4a..f1d698ed7a 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -479,21 +479,17 @@ Status OlapScanNode::start_scan(RuntimeState* state) { return Status::OK(); } - VLOG_CRITICAL << "BuildOlapFilters"; + VLOG_CRITICAL << "BuildKeyRangesAndFilters"; // 3. Using ColumnValueRange to Build StorageEngine filters - RETURN_IF_ERROR(build_olap_filters()); - - VLOG_CRITICAL << "BuildScanKey"; - // 4. Using `Key Column`'s ColumnValueRange to split ScanRange to several `Sub ScanRange` - RETURN_IF_ERROR(build_scan_key()); + RETURN_IF_ERROR(build_key_ranges_and_filters()); VLOG_CRITICAL << "Filter idle conjuncts"; - // 5. Filter idle conjunct which already trans to olap filters + // 4. Filter idle conjunct which already trans to olap filters // this must be after build_scan_key, it will free the StringValue memory remove_pushed_conjuncts(state); VLOG_CRITICAL << "StartScanThread"; - // 6. Start multi thread to read several `Sub Sub ScanRange` + // 5. Start multi thread to read several `Sub Sub ScanRange` RETURN_IF_ERROR(start_scan_thread(state)); return Status::OK(); @@ -683,22 +679,7 @@ static std::string olap_filters_to_string(const std::vector<doris::TCondition>& return filters_string; } -Status OlapScanNode::build_olap_filters() { - for (auto& iter : _column_value_ranges) { - std::vector<TCondition> filters; - std::visit([&](auto&& range) { range.to_olap_filter(filters); }, iter.second); - - for (const auto& filter : filters) { - _olap_filter.push_back(std::move(filter)); - } - } - - _runtime_profile->add_info_string("PushdownPredicate", olap_filters_to_string(_olap_filter)); - - return Status::OK(); -} - -Status OlapScanNode::build_scan_key() { +Status OlapScanNode::build_key_ranges_and_filters() { const std::vector<std::string>& column_names = _olap_scan_node.key_column_name; const std::vector<TPrimitiveType::type>& column_types = _olap_scan_node.key_column_type; DCHECK(column_types.size() == column_names.size()); @@ -706,6 +687,9 @@ Status OlapScanNode::build_scan_key() { // 1. construct scan key except last olap engine short key _scan_keys.set_is_convertible(limit() == -1); + // we use `exact_range` to identify a key range is an exact range or not when we convert + // it to `_scan_keys`. If `exact_range` is true, we can just discard it from `_olap_filter`. + bool exact_range = true; for (int column_index = 0; column_index < column_names.size() && !_scan_keys.has_range_value(); ++column_index) { auto iter = _column_value_ranges.find(column_names[column_index]); @@ -714,9 +698,28 @@ Status OlapScanNode::build_scan_key() { } RETURN_IF_ERROR(std::visit( - [&](auto&& range) { return _scan_keys.extend_scan_key(range, _max_scan_key_num); }, + [&](auto&& range) { + RETURN_IF_ERROR( + _scan_keys.extend_scan_key(range, _max_scan_key_num, &exact_range)); + if (exact_range) { + _column_value_ranges.erase(iter->first); + } + return Status::OK(); + }, iter->second)); } + for (auto& iter : _column_value_ranges) { + std::vector<TCondition> filters; + std::visit([&](auto&& range) { range.to_olap_filter(filters); }, iter.second); + + for (const auto& filter : filters) { + _olap_filter.push_back(std::move(filter)); + } + } + + _runtime_profile->add_info_string("PushdownPredicate", olap_filters_to_string(_olap_filter)); + + _runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string()); VLOG_CRITICAL << _scan_keys.debug_string(); diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index d3883fdfc7..88c17ba1e2 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -107,8 +107,7 @@ protected: void eval_const_conjuncts(); Status normalize_conjuncts(); - Status build_olap_filters(); - Status build_scan_key(); + Status build_key_ranges_and_filters(); Status start_scan_thread(RuntimeState* state); template <class T> diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp index ba1a30c22e..1fdd398b22 100644 --- a/be/src/vec/exec/volap_scan_node.cpp +++ b/be/src/vec/exec/volap_scan_node.cpp @@ -714,22 +714,7 @@ static std::string olap_filters_to_string(const std::vector<doris::TCondition>& return filters_string; } -Status VOlapScanNode::build_olap_filters() { - for (auto& iter : _column_value_ranges) { - std::vector<TCondition> filters; - std::visit([&](auto&& range) { range.to_olap_filter(filters); }, iter.second); - - for (const auto& filter : filters) { - _olap_filter.push_back(std::move(filter)); - } - } - - _runtime_profile->add_info_string("PushdownPredicate", olap_filters_to_string(_olap_filter)); - - return Status::OK(); -} - -Status VOlapScanNode::build_scan_key() { +Status VOlapScanNode::build_key_ranges_and_filters() { const std::vector<std::string>& column_names = _olap_scan_node.key_column_name; const std::vector<TPrimitiveType::type>& column_types = _olap_scan_node.key_column_type; DCHECK(column_types.size() == column_names.size()); @@ -737,6 +722,9 @@ Status VOlapScanNode::build_scan_key() { // 1. construct scan key except last olap engine short key _scan_keys.set_is_convertible(limit() == -1); + // we use `exact_range` to identify a key range is an exact range or not when we convert + // it to `_scan_keys`. If `exact_range` is true, we can just discard it from `_olap_filter`. + bool exact_range = true; for (int column_index = 0; column_index < column_names.size() && !_scan_keys.has_range_value(); ++column_index) { auto iter = _column_value_ranges.find(column_names[column_index]); @@ -745,10 +733,30 @@ Status VOlapScanNode::build_scan_key() { } RETURN_IF_ERROR(std::visit( - [&](auto&& range) { return _scan_keys.extend_scan_key(range, _max_scan_key_num); }, + [&](auto&& range) { + RETURN_IF_ERROR( + _scan_keys.extend_scan_key(range, _max_scan_key_num, &exact_range)); + if (exact_range) { + _column_value_ranges.erase(iter->first); + } + return Status::OK(); + }, iter->second)); } + for (auto& iter : _column_value_ranges) { + std::vector<TCondition> filters; + std::visit([&](auto&& range) { range.to_olap_filter(filters); }, iter.second); + + for (const auto& filter : filters) { + _olap_filter.push_back(std::move(filter)); + } + } + + _runtime_profile->add_info_string("PushdownPredicate", olap_filters_to_string(_olap_filter)); + + _runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string()); + VLOG_CRITICAL << _scan_keys.debug_string(); return Status::OK(); @@ -771,21 +779,17 @@ Status VOlapScanNode::start_scan(RuntimeState* state) { return Status::OK(); } - VLOG_CRITICAL << "BuildOlapFilters"; - // 3. Using ColumnValueRange to Build StorageEngine filters - RETURN_IF_ERROR(build_olap_filters()); - - VLOG_CRITICAL << "BuildScanKey"; - // 4. Using `Key Column`'s ColumnValueRange to split ScanRange to several `Sub ScanRange` - RETURN_IF_ERROR(build_scan_key()); + VLOG_CRITICAL << "BuildKeyRangesAndFilters"; + // 3. Using `Key Column`'s ColumnValueRange to split ScanRange to several `Sub ScanRange` + RETURN_IF_ERROR(build_key_ranges_and_filters()); VLOG_CRITICAL << "Filter idle conjuncts"; - // 5. Filter idle conjunct which already trans to olap filters + // 4. Filter idle conjunct which already trans to olap filters // this must be after build_scan_key, it will free the StringValue memory remove_pushed_conjuncts(state); VLOG_CRITICAL << "StartScanThread"; - // 6. Start multi thread to read several `Sub Sub ScanRange` + // 5. Start multi thread to read several `Sub Sub ScanRange` RETURN_IF_ERROR(start_scan_thread(state)); return Status::OK(); diff --git a/be/src/vec/exec/volap_scan_node.h b/be/src/vec/exec/volap_scan_node.h index f6ce2c5f25..6e8e4a0fde 100644 --- a/be/src/vec/exec/volap_scan_node.h +++ b/be/src/vec/exec/volap_scan_node.h @@ -65,8 +65,7 @@ private: Status start_scan(RuntimeState* state); void eval_const_conjuncts(); Status normalize_conjuncts(); - Status build_olap_filters(); - Status build_scan_key(); + Status build_key_ranges_and_filters(); template <class T> Status normalize_predicate(ColumnValueRange<T>& range, SlotDescriptor* slot); diff --git a/be/test/exec/olap_common_test.cpp b/be/test/exec/olap_common_test.cpp index e44ae17a0f..d444f2f7b3 100644 --- a/be/test/exec/olap_common_test.cpp +++ b/be/test/exec/olap_common_test.cpp @@ -443,7 +443,9 @@ TEST_F(OlapScanKeysTest, ExtendFixedTest) { EXPECT_TRUE(range1.add_fixed_value(i).ok()); } - scan_keys.extend_scan_key(range1, 1024); + bool exact_range = true; + scan_keys.extend_scan_key(range1, 1024, &exact_range); + EXPECT_EQ(exact_range, true); std::vector<std::unique_ptr<OlapScanRange>> key_range; scan_keys.get_key_range(&key_range); @@ -465,7 +467,9 @@ TEST_F(OlapScanKeysTest, ExtendFixedTest) { EXPECT_TRUE(range2.add_fixed_value(i).ok()); } - scan_keys.extend_scan_key(range2, 1024); + exact_range = true; + scan_keys.extend_scan_key(range2, 1024, &exact_range); + EXPECT_EQ(exact_range, true); scan_keys.get_key_range(&key_range); @@ -492,7 +496,9 @@ TEST_F(OlapScanKeysTest, ExtendFixedTest) { range2.set_whole_value_range(); EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 100).ok()); - scan_keys.extend_scan_key(range2, 1024); + exact_range = true; + scan_keys.extend_scan_key(range2, 1024, &exact_range); + EXPECT_EQ(exact_range, true); scan_keys.get_key_range(&key_range); @@ -526,15 +532,18 @@ TEST_F(OlapScanKeysTest, ExtendFixedAndRangeTest) { EXPECT_TRUE(range1.add_fixed_value(i).ok()); } - scan_keys.extend_scan_key(range1, 1024); + bool exact_range = true; + scan_keys.extend_scan_key(range1, 1024, &exact_range); + EXPECT_EQ(exact_range, true); ColumnValueRange<int32_t> range2("col", TYPE_BIGINT); EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - scan_keys.extend_scan_key(range2, 1024); + exact_range = true; + scan_keys.extend_scan_key(range2, 1024, &exact_range); + EXPECT_EQ(exact_range, true); std::vector<std::unique_ptr<OlapScanRange>> key_range; - ; scan_keys.get_key_range(&key_range); @@ -551,7 +560,9 @@ TEST_F(OlapScanKeysTest, ExtendFixedAndRangeTest) { EXPECT_TRUE(range2.add_range(FILTER_LESS, 100).ok()); - scan_keys.extend_scan_key(range2, 1024); + exact_range = true; + scan_keys.extend_scan_key(range2, 1024, &exact_range); + EXPECT_EQ(exact_range, true); scan_keys.get_key_range(&key_range); @@ -575,10 +586,11 @@ TEST_F(OlapScanKeysTest, ExtendRangeTest) { EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); EXPECT_TRUE(range2.add_range(FILTER_LESS_OR_EQUAL, 100).ok()); - EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024).ok()); + bool exact_range = true; + EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024, &exact_range).ok()); + EXPECT_EQ(exact_range, true); std::vector<std::unique_ptr<OlapScanRange>> key_range; - ; scan_keys.get_key_range(&key_range); @@ -589,7 +601,9 @@ TEST_F(OlapScanKeysTest, ExtendRangeTest) { EXPECT_TRUE(range2.add_range(FILTER_LESS, 50).ok()); - EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024).ok()); + exact_range = true; + EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024, &exact_range).ok()); + EXPECT_EQ(exact_range, false); scan_keys.get_key_range(&key_range); @@ -606,7 +620,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) { { OlapScanKeys scan_keys; ColumnValueRange<int8_t> range("col", TYPE_TINYINT); - EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok()); + bool exact_range = true; + EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok()); + EXPECT_EQ(exact_range, true); scan_keys.get_key_range(&key_range); // contain null, [-128, 127] EXPECT_EQ(key_range.size(), 257); @@ -615,7 +631,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) { EXPECT_TRUE(range.add_range(FILTER_LESS, 50).ok()); scan_keys.clear(); - EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok()); + exact_range = true; + EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok()); + EXPECT_EQ(exact_range, true); scan_keys.get_key_range(&key_range); EXPECT_EQ(key_range.size(), 178); @@ -626,7 +644,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) { { OlapScanKeys scan_keys; ColumnValueRange<int16_t> range("col", TYPE_SMALLINT); - EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok()); + bool exact_range = true; + EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok()); + EXPECT_EQ(exact_range, true); scan_keys.get_key_range(&key_range); EXPECT_EQ(key_range.size(), 1); EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "null"); @@ -634,7 +654,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) { EXPECT_TRUE(range.add_range(FILTER_LARGER, 0).ok()); scan_keys.clear(); - EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok()); + exact_range = true; + EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok()); + EXPECT_EQ(exact_range, true); scan_keys.get_key_range(&key_range); EXPECT_EQ(key_range.size(), 1); @@ -643,7 +665,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) { EXPECT_TRUE(range.add_range(FILTER_LESS, 32766).ok()); scan_keys.clear(); - EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok()); + exact_range = true; + EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok()); + EXPECT_EQ(exact_range, true); scan_keys.get_key_range(&key_range); EXPECT_EQ(key_range.size(), 1); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org