This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 139cd3d11a [Improvement] remove olap filters when use in key ranges 
(#10278)
139cd3d11a is described below

commit 139cd3d11acee071293c8badf14b432340e5aaac
Author: Gabriel <gabrielleeb...@gmail.com>
AuthorDate: Thu Jun 23 09:12:29 2022 +0800

    [Improvement] remove olap filters when use in key ranges (#10278)
---
 be/src/exec/olap_common.h           | 12 ++++----
 be/src/exec/olap_scan_node.cpp      | 53 ++++++++++++++++++-----------------
 be/src/exec/olap_scan_node.h        |  3 +-
 be/src/vec/exec/volap_scan_node.cpp | 56 ++++++++++++++++++++-----------------
 be/src/vec/exec/volap_scan_node.h   |  3 +-
 be/test/exec/olap_common_test.cpp   | 54 +++++++++++++++++++++++++----------
 6 files changed, 106 insertions(+), 75 deletions(-)

diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 9eb0e0a3b2..41d8f7aa5c 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -256,7 +256,7 @@ public:
               _is_convertible(true) {}
 
     template <class T>
-    Status extend_scan_key(ColumnValueRange<T>& range, int32_t 
max_scan_key_num);
+    Status extend_scan_key(ColumnValueRange<T>& range, int32_t 
max_scan_key_num, bool* exact_value);
 
     Status get_key_range(std::vector<std::unique_ptr<OlapScanRange>>* 
key_range);
 
@@ -738,9 +738,10 @@ bool 
ColumnValueRange<T>::has_intersection(ColumnValueRange<T>& range) {
 }
 
 template <class T>
-Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t 
max_scan_key_num) {
+Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t 
max_scan_key_num,
+                                     bool* exact_value) {
     using namespace std;
-    typedef typename set<T>::const_iterator const_iterator_type;
+    using ConstIterator = typename set<T>::const_iterator;
 
     // 1. clear ScanKey if some column range is empty
     if (range.is_empty_value_range()) {
@@ -760,6 +761,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& 
range, int32_t max_sca
         if (range.get_fixed_value_size() > max_scan_key_num / scan_keys_size) {
             if (range.is_range_value_convertible()) {
                 range.convert_to_range_value();
+                *exact_value = false;
             } else {
                 return Status::OK();
             }
@@ -777,7 +779,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& 
range, int32_t max_sca
         // 3.1.1 construct num of fixed value ScanKey (begin_key == end_key)
         if (_begin_scan_keys.empty()) {
             const set<T>& fixed_value_set = range.get_fixed_value_set();
-            const_iterator_type iter = fixed_value_set.begin();
+            ConstIterator iter = fixed_value_set.begin();
 
             for (; iter != fixed_value_set.end(); ++iter) {
                 _begin_scan_keys.emplace_back();
@@ -801,7 +803,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& 
range, int32_t max_sca
                 OlapTuple start_base_key_range = _begin_scan_keys[i];
                 OlapTuple end_base_key_range = _end_scan_keys[i];
 
-                const_iterator_type iter = fixed_value_set.begin();
+                ConstIterator iter = fixed_value_set.begin();
 
                 for (; iter != fixed_value_set.end(); ++iter) {
                     // alter the first ScanKey in original place
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 94d3aacf4a..f1d698ed7a 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -479,21 +479,17 @@ Status OlapScanNode::start_scan(RuntimeState* state) {
         return Status::OK();
     }
 
-    VLOG_CRITICAL << "BuildOlapFilters";
+    VLOG_CRITICAL << "BuildKeyRangesAndFilters";
     // 3. Using ColumnValueRange to Build StorageEngine filters
-    RETURN_IF_ERROR(build_olap_filters());
-
-    VLOG_CRITICAL << "BuildScanKey";
-    // 4. Using `Key Column`'s ColumnValueRange to split ScanRange to several 
`Sub ScanRange`
-    RETURN_IF_ERROR(build_scan_key());
+    RETURN_IF_ERROR(build_key_ranges_and_filters());
 
     VLOG_CRITICAL << "Filter idle conjuncts";
-    // 5. Filter idle conjunct which already trans to olap filters
+    // 4. Filter idle conjunct which already trans to olap filters
     // this must be after build_scan_key, it will free the StringValue memory
     remove_pushed_conjuncts(state);
 
     VLOG_CRITICAL << "StartScanThread";
-    // 6. Start multi thread to read several `Sub Sub ScanRange`
+    // 5. Start multi thread to read several `Sub Sub ScanRange`
     RETURN_IF_ERROR(start_scan_thread(state));
 
     return Status::OK();
@@ -683,22 +679,7 @@ static std::string olap_filters_to_string(const 
std::vector<doris::TCondition>&
     return filters_string;
 }
 
-Status OlapScanNode::build_olap_filters() {
-    for (auto& iter : _column_value_ranges) {
-        std::vector<TCondition> filters;
-        std::visit([&](auto&& range) { range.to_olap_filter(filters); }, 
iter.second);
-
-        for (const auto& filter : filters) {
-            _olap_filter.push_back(std::move(filter));
-        }
-    }
-
-    _runtime_profile->add_info_string("PushdownPredicate", 
olap_filters_to_string(_olap_filter));
-
-    return Status::OK();
-}
-
-Status OlapScanNode::build_scan_key() {
+Status OlapScanNode::build_key_ranges_and_filters() {
     const std::vector<std::string>& column_names = 
_olap_scan_node.key_column_name;
     const std::vector<TPrimitiveType::type>& column_types = 
_olap_scan_node.key_column_type;
     DCHECK(column_types.size() == column_names.size());
@@ -706,6 +687,9 @@ Status OlapScanNode::build_scan_key() {
     // 1. construct scan key except last olap engine short key
     _scan_keys.set_is_convertible(limit() == -1);
 
+    // we use `exact_range` to identify a key range is an exact range or not 
when we convert
+    // it to `_scan_keys`. If `exact_range` is true, we can just discard it 
from `_olap_filter`.
+    bool exact_range = true;
     for (int column_index = 0; column_index < column_names.size() && 
!_scan_keys.has_range_value();
          ++column_index) {
         auto iter = _column_value_ranges.find(column_names[column_index]);
@@ -714,9 +698,28 @@ Status OlapScanNode::build_scan_key() {
         }
 
         RETURN_IF_ERROR(std::visit(
-                [&](auto&& range) { return _scan_keys.extend_scan_key(range, 
_max_scan_key_num); },
+                [&](auto&& range) {
+                    RETURN_IF_ERROR(
+                            _scan_keys.extend_scan_key(range, 
_max_scan_key_num, &exact_range));
+                    if (exact_range) {
+                        _column_value_ranges.erase(iter->first);
+                    }
+                    return Status::OK();
+                },
                 iter->second));
     }
+    for (auto& iter : _column_value_ranges) {
+        std::vector<TCondition> filters;
+        std::visit([&](auto&& range) { range.to_olap_filter(filters); }, 
iter.second);
+
+        for (const auto& filter : filters) {
+            _olap_filter.push_back(std::move(filter));
+        }
+    }
+
+    _runtime_profile->add_info_string("PushdownPredicate", 
olap_filters_to_string(_olap_filter));
+
+    _runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string());
 
     VLOG_CRITICAL << _scan_keys.debug_string();
 
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index d3883fdfc7..88c17ba1e2 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -107,8 +107,7 @@ protected:
 
     void eval_const_conjuncts();
     Status normalize_conjuncts();
-    Status build_olap_filters();
-    Status build_scan_key();
+    Status build_key_ranges_and_filters();
     Status start_scan_thread(RuntimeState* state);
 
     template <class T>
diff --git a/be/src/vec/exec/volap_scan_node.cpp 
b/be/src/vec/exec/volap_scan_node.cpp
index ba1a30c22e..1fdd398b22 100644
--- a/be/src/vec/exec/volap_scan_node.cpp
+++ b/be/src/vec/exec/volap_scan_node.cpp
@@ -714,22 +714,7 @@ static std::string olap_filters_to_string(const 
std::vector<doris::TCondition>&
     return filters_string;
 }
 
-Status VOlapScanNode::build_olap_filters() {
-    for (auto& iter : _column_value_ranges) {
-        std::vector<TCondition> filters;
-        std::visit([&](auto&& range) { range.to_olap_filter(filters); }, 
iter.second);
-
-        for (const auto& filter : filters) {
-            _olap_filter.push_back(std::move(filter));
-        }
-    }
-
-    _runtime_profile->add_info_string("PushdownPredicate", 
olap_filters_to_string(_olap_filter));
-
-    return Status::OK();
-}
-
-Status VOlapScanNode::build_scan_key() {
+Status VOlapScanNode::build_key_ranges_and_filters() {
     const std::vector<std::string>& column_names = 
_olap_scan_node.key_column_name;
     const std::vector<TPrimitiveType::type>& column_types = 
_olap_scan_node.key_column_type;
     DCHECK(column_types.size() == column_names.size());
@@ -737,6 +722,9 @@ Status VOlapScanNode::build_scan_key() {
     // 1. construct scan key except last olap engine short key
     _scan_keys.set_is_convertible(limit() == -1);
 
+    // we use `exact_range` to identify a key range is an exact range or not 
when we convert
+    // it to `_scan_keys`. If `exact_range` is true, we can just discard it 
from `_olap_filter`.
+    bool exact_range = true;
     for (int column_index = 0; column_index < column_names.size() && 
!_scan_keys.has_range_value();
          ++column_index) {
         auto iter = _column_value_ranges.find(column_names[column_index]);
@@ -745,10 +733,30 @@ Status VOlapScanNode::build_scan_key() {
         }
 
         RETURN_IF_ERROR(std::visit(
-                [&](auto&& range) { return _scan_keys.extend_scan_key(range, 
_max_scan_key_num); },
+                [&](auto&& range) {
+                    RETURN_IF_ERROR(
+                            _scan_keys.extend_scan_key(range, 
_max_scan_key_num, &exact_range));
+                    if (exact_range) {
+                        _column_value_ranges.erase(iter->first);
+                    }
+                    return Status::OK();
+                },
                 iter->second));
     }
 
+    for (auto& iter : _column_value_ranges) {
+        std::vector<TCondition> filters;
+        std::visit([&](auto&& range) { range.to_olap_filter(filters); }, 
iter.second);
+
+        for (const auto& filter : filters) {
+            _olap_filter.push_back(std::move(filter));
+        }
+    }
+
+    _runtime_profile->add_info_string("PushdownPredicate", 
olap_filters_to_string(_olap_filter));
+
+    _runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string());
+
     VLOG_CRITICAL << _scan_keys.debug_string();
 
     return Status::OK();
@@ -771,21 +779,17 @@ Status VOlapScanNode::start_scan(RuntimeState* state) {
         return Status::OK();
     }
 
-    VLOG_CRITICAL << "BuildOlapFilters";
-    // 3. Using ColumnValueRange to Build StorageEngine filters
-    RETURN_IF_ERROR(build_olap_filters());
-
-    VLOG_CRITICAL << "BuildScanKey";
-    // 4. Using `Key Column`'s ColumnValueRange to split ScanRange to several 
`Sub ScanRange`
-    RETURN_IF_ERROR(build_scan_key());
+    VLOG_CRITICAL << "BuildKeyRangesAndFilters";
+    // 3. Using `Key Column`'s ColumnValueRange to split ScanRange to several 
`Sub ScanRange`
+    RETURN_IF_ERROR(build_key_ranges_and_filters());
 
     VLOG_CRITICAL << "Filter idle conjuncts";
-    // 5. Filter idle conjunct which already trans to olap filters
+    // 4. Filter idle conjunct which already trans to olap filters
     // this must be after build_scan_key, it will free the StringValue memory
     remove_pushed_conjuncts(state);
 
     VLOG_CRITICAL << "StartScanThread";
-    // 6. Start multi thread to read several `Sub Sub ScanRange`
+    // 5. Start multi thread to read several `Sub Sub ScanRange`
     RETURN_IF_ERROR(start_scan_thread(state));
 
     return Status::OK();
diff --git a/be/src/vec/exec/volap_scan_node.h 
b/be/src/vec/exec/volap_scan_node.h
index f6ce2c5f25..6e8e4a0fde 100644
--- a/be/src/vec/exec/volap_scan_node.h
+++ b/be/src/vec/exec/volap_scan_node.h
@@ -65,8 +65,7 @@ private:
     Status start_scan(RuntimeState* state);
     void eval_const_conjuncts();
     Status normalize_conjuncts();
-    Status build_olap_filters();
-    Status build_scan_key();
+    Status build_key_ranges_and_filters();
     template <class T>
     Status normalize_predicate(ColumnValueRange<T>& range, SlotDescriptor* 
slot);
 
diff --git a/be/test/exec/olap_common_test.cpp 
b/be/test/exec/olap_common_test.cpp
index e44ae17a0f..d444f2f7b3 100644
--- a/be/test/exec/olap_common_test.cpp
+++ b/be/test/exec/olap_common_test.cpp
@@ -443,7 +443,9 @@ TEST_F(OlapScanKeysTest, ExtendFixedTest) {
         EXPECT_TRUE(range1.add_fixed_value(i).ok());
     }
 
-    scan_keys.extend_scan_key(range1, 1024);
+    bool exact_range = true;
+    scan_keys.extend_scan_key(range1, 1024, &exact_range);
+    EXPECT_EQ(exact_range, true);
 
     std::vector<std::unique_ptr<OlapScanRange>> key_range;
     scan_keys.get_key_range(&key_range);
@@ -465,7 +467,9 @@ TEST_F(OlapScanKeysTest, ExtendFixedTest) {
         EXPECT_TRUE(range2.add_fixed_value(i).ok());
     }
 
-    scan_keys.extend_scan_key(range2, 1024);
+    exact_range = true;
+    scan_keys.extend_scan_key(range2, 1024, &exact_range);
+    EXPECT_EQ(exact_range, true);
 
     scan_keys.get_key_range(&key_range);
 
@@ -492,7 +496,9 @@ TEST_F(OlapScanKeysTest, ExtendFixedTest) {
     range2.set_whole_value_range();
     EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 100).ok());
 
-    scan_keys.extend_scan_key(range2, 1024);
+    exact_range = true;
+    scan_keys.extend_scan_key(range2, 1024, &exact_range);
+    EXPECT_EQ(exact_range, true);
 
     scan_keys.get_key_range(&key_range);
 
@@ -526,15 +532,18 @@ TEST_F(OlapScanKeysTest, ExtendFixedAndRangeTest) {
         EXPECT_TRUE(range1.add_fixed_value(i).ok());
     }
 
-    scan_keys.extend_scan_key(range1, 1024);
+    bool exact_range = true;
+    scan_keys.extend_scan_key(range1, 1024, &exact_range);
+    EXPECT_EQ(exact_range, true);
 
     ColumnValueRange<int32_t> range2("col", TYPE_BIGINT);
     EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok());
 
-    scan_keys.extend_scan_key(range2, 1024);
+    exact_range = true;
+    scan_keys.extend_scan_key(range2, 1024, &exact_range);
+    EXPECT_EQ(exact_range, true);
 
     std::vector<std::unique_ptr<OlapScanRange>> key_range;
-    ;
 
     scan_keys.get_key_range(&key_range);
 
@@ -551,7 +560,9 @@ TEST_F(OlapScanKeysTest, ExtendFixedAndRangeTest) {
 
     EXPECT_TRUE(range2.add_range(FILTER_LESS, 100).ok());
 
-    scan_keys.extend_scan_key(range2, 1024);
+    exact_range = true;
+    scan_keys.extend_scan_key(range2, 1024, &exact_range);
+    EXPECT_EQ(exact_range, true);
 
     scan_keys.get_key_range(&key_range);
 
@@ -575,10 +586,11 @@ TEST_F(OlapScanKeysTest, ExtendRangeTest) {
     EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok());
     EXPECT_TRUE(range2.add_range(FILTER_LESS_OR_EQUAL, 100).ok());
 
-    EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024).ok());
+    bool exact_range = true;
+    EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024, &exact_range).ok());
+    EXPECT_EQ(exact_range, true);
 
     std::vector<std::unique_ptr<OlapScanRange>> key_range;
-    ;
 
     scan_keys.get_key_range(&key_range);
 
@@ -589,7 +601,9 @@ TEST_F(OlapScanKeysTest, ExtendRangeTest) {
 
     EXPECT_TRUE(range2.add_range(FILTER_LESS, 50).ok());
 
-    EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024).ok());
+    exact_range = true;
+    EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024, &exact_range).ok());
+    EXPECT_EQ(exact_range, false);
 
     scan_keys.get_key_range(&key_range);
 
@@ -606,7 +620,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
     {
         OlapScanKeys scan_keys;
         ColumnValueRange<int8_t> range("col", TYPE_TINYINT);
-        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok());
+        bool exact_range = true;
+        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok());
+        EXPECT_EQ(exact_range, true);
         scan_keys.get_key_range(&key_range);
         // contain null, [-128, 127]
         EXPECT_EQ(key_range.size(), 257);
@@ -615,7 +631,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
 
         EXPECT_TRUE(range.add_range(FILTER_LESS, 50).ok());
         scan_keys.clear();
-        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok());
+        exact_range = true;
+        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok());
+        EXPECT_EQ(exact_range, true);
         scan_keys.get_key_range(&key_range);
 
         EXPECT_EQ(key_range.size(), 178);
@@ -626,7 +644,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
     {
         OlapScanKeys scan_keys;
         ColumnValueRange<int16_t> range("col", TYPE_SMALLINT);
-        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok());
+        bool exact_range = true;
+        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok());
+        EXPECT_EQ(exact_range, true);
         scan_keys.get_key_range(&key_range);
         EXPECT_EQ(key_range.size(), 1);
         EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), 
"null");
@@ -634,7 +654,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
 
         EXPECT_TRUE(range.add_range(FILTER_LARGER, 0).ok());
         scan_keys.clear();
-        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok());
+        exact_range = true;
+        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok());
+        EXPECT_EQ(exact_range, true);
         scan_keys.get_key_range(&key_range);
 
         EXPECT_EQ(key_range.size(), 1);
@@ -643,7 +665,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
 
         EXPECT_TRUE(range.add_range(FILTER_LESS, 32766).ok());
         scan_keys.clear();
-        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024).ok());
+        exact_range = true;
+        EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range).ok());
+        EXPECT_EQ(exact_range, true);
         scan_keys.get_key_range(&key_range);
 
         EXPECT_EQ(key_range.size(), 1);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to