[GitHub] [doris] xinyiZzz commented on a diff in pull request #15917: [improvement](scan) Support pushdown execute expr ctx

via GitHub Thu, 23 Feb 2023 23:40:57 -0800


xinyiZzz commented on code in PR #15917:
URL: https://github.com/apache/doris/pull/15917#discussion_r1116608487



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1160,58 +1189,86 @@ void SegmentIterator::_vec_init_lazy_materialization() {
         _is_need_short_eval = true;
     }
 
-    // Step 2: check non-predicate read costs to determine whether need lazy 
materialization
-    // fill _non_predicate_columns.
-    // After some optimization, we suppose lazy materialization is better 
performance.
+    // make _schema_block_id_map
+    _schema_block_id_map.resize(_schema.columns().size());
+    for (int i = 0; i < _schema.num_column_ids(); i++) {
+        auto cid = _schema.column_id(i);
+        _schema_block_id_map[cid] = i;
+    }
+
+    // Step2: extract columns that can execute expr context
+    _is_remaining_expr_column.resize(_schema.columns().size(), false);
+    if (_enable_remaining_expr_pushdown && _remaining_vconjunct_root != 
nullptr) {
+        _extract_remaining_expr_columns(_remaining_vconjunct_root);
+        if (!_remaining_expr_columns.empty()) {
+            _is_need_expr_eval = true;
+            for (auto cid : _schema.column_ids()) {
+                // pred column also needs to be filtered by expr
+                if (_is_remaining_expr_column[cid] || _is_pred_column[cid]) {
+                    auto loc = _schema_block_id_map[cid];
+                    _columns_to_filter.push_back(loc);
+                }
+            }
+        }
+    }
+
+    // Step 3: fill non predicate columns and second read columns
     if (_schema.column_ids().size() > pred_column_ids.size()) {
         for (auto cid : _schema.column_ids()) {
             if (!_is_pred_column[cid]) {
-                _non_predicate_columns.push_back(cid);
+                if (!_is_remaining_expr_column[cid]) {
+                    _non_predicate_columns.push_back(cid);
+                } else {
+                    _second_read_column_ids.push_back(cid);
+                }
                 if (_is_need_vec_eval || _is_need_short_eval) {
                     _lazy_materialization_read = true;
                 }
             }
         }
     }
 
-    // Step 3: fill column ids for read and output
+    // Step 4: fill first read columns
     if (_lazy_materialization_read) {
         // insert pred cid to first_read_columns
         for (auto cid : pred_column_ids) {
             _first_read_column_ids.push_back(cid);
         }
-    } else if (!_is_need_vec_eval &&
-               !_is_need_short_eval) { // no pred exists, just read and output 
column
+    } else if (!_is_need_vec_eval && !_is_need_short_eval &&
+               !_is_need_expr_eval) { // no pred exists, just read and output 
column
         for (int i = 0; i < _schema.num_column_ids(); i++) {
             auto cid = _schema.column_id(i);
             _first_read_column_ids.push_back(cid);
         }
-    } else { // pred exits, but we can eliminate lazy materialization
-        // insert pred/non-pred cid to first read columns
-        std::set<ColumnId> pred_id_set;
-        pred_id_set.insert(_short_cir_pred_column_ids.begin(), 
_short_cir_pred_column_ids.end());
-        pred_id_set.insert(_vec_pred_column_ids.begin(), 
_vec_pred_column_ids.end());
-        std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(),
-                                        _non_predicate_columns.end());
-
-        for (int i = 0; i < _schema.num_column_ids(); i++) {
-            auto cid = _schema.column_id(i);
-            if (pred_id_set.find(cid) != pred_id_set.end()) {
-                _first_read_column_ids.push_back(cid);
-            } else if (non_pred_set.find(cid) != non_pred_set.end()) {
+    } else {
+        if (_is_need_vec_eval || _is_need_short_eval) {
+            // TODO To refactor, because we suppose lazy materialization is 
better performance.
+            // pred exits, but we can eliminate lazy materialization
+            // insert pred/non-pred cid to first read columns
+            std::set<ColumnId> pred_id_set;
+            pred_id_set.insert(_short_cir_pred_column_ids.begin(),
+                               _short_cir_pred_column_ids.end());
+            pred_id_set.insert(_vec_pred_column_ids.begin(), 
_vec_pred_column_ids.end());
+            std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(),
+                                            _non_predicate_columns.end());
+
+            for (int i = 0; i < _schema.num_column_ids(); i++) {
+                auto cid = _schema.column_id(i);
+                if (pred_id_set.find(cid) != pred_id_set.end()) {
+                    _first_read_column_ids.push_back(cid);
+                } else if (non_pred_set.find(cid) != non_pred_set.end()) {
+                    _first_read_column_ids.push_back(cid);
+                    // when _lazy_materialization_read = false, non-predicate 
column should also be filtered by sel idx, so we regard it as pred columns
+                    _is_pred_column[cid] = true;
+                }
+            }
+        } else if (_is_need_expr_eval) {
+            DCHECK(!_is_need_vec_eval && !_is_need_short_eval);
+            for (auto cid : _remaining_expr_columns) {
                 _first_read_column_ids.push_back(cid);
-                // when _lazy_materialization_read = false, non-predicate 
column should also be filtered by sel idx, so we regard it as pred columns
-                _is_pred_column[cid] = true;
             }

Review Comment:
   In this case `_second_read_column_ids` must be empty.
   Otherwise `_lazy_materialization_read` must not false and will enter the 
first `if`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[GitHub] [doris] xinyiZzz commented on a diff in pull request #15917: [improvement](scan) Support pushdown execute expr ctx

Reply via email to