[GitHub] [doris] github-actions[bot] commented on a diff in pull request #15917: [improvement](scan) Support pushdown execute expr ctx

via GitHub Wed, 08 Mar 2023 11:38:31 -0800


github-actions[bot] commented on code in PR #15917:
URL: https://github.com/apache/doris/pull/15917#discussion_r1129940414



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1583,31 +1651,117 @@ Status SegmentIterator::next_batch(vectorized::Block* 
block) {
         for (int i = 0; i < block->columns(); i++) {
             auto cid = _schema.column_id(i);
             // todo(wb) abstract make column where
-            if (!_is_pred_column[cid]) { // non-predicate
+            if (!_is_pred_column[cid]) {
                 block->replace_by_position(i, 
std::move(_current_return_columns[cid]));
             }
         }
         block->clear_column_data();
         return Status::EndOfFile("no more data in segment");
     }
 
-    if (!_is_need_vec_eval && !_is_need_short_eval) {
-        _replace_version_col(_current_batch_rows_read);
+    if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
         _output_non_pred_columns(block);
         _output_index_result_column(nullptr, 0, block);
     } else {
-        _convert_dict_code_for_predicate_if_necessary();
         uint16_t selected_size = _current_batch_rows_read;
         uint16_t sel_rowid_idx[selected_size];
 
-        // step 1: evaluate vectorization predicate
-        selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, 
selected_size);
+        if (_is_need_vec_eval || _is_need_short_eval) {
+            _convert_dict_code_for_predicate_if_necessary();
+
+            // step 1: evaluate vectorization predicate
+            selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, 
selected_size);
+
+            // step 2: evaluate short circuit predicate
+            // todo(wb) research whether need to read short predicate after 
vectorization evaluation
+            //          to reduce cost of read short circuit columns.
+            //          In SSB test, it make no difference; So need more 
scenarios to test
+            selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, 
selected_size);
 
-        // step 2: evaluate short circuit predicate
-        // todo(wb) research whether need to read short predicate after 
vectorization evaluation
-        //          to reduce cost of read short circuit columns.
-        //          In SSB test, it make no difference; So need more scenarios 
to test
-        selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, 
selected_size);
+            if (selected_size > 0) {
+                // step 3.1: output short circuit and predicate column
+                // when lazy materialization enables, _first_read_column_ids = 
distinct(_short_cir_pred_column_ids + _vec_pred_column_ids)
+                // see _vec_init_lazy_materialization
+                // todo(wb) need to tell input columnids from output columnids
+                RETURN_IF_ERROR(_output_column_by_sel_idx(block, 
_first_read_column_ids,
+                                                          sel_rowid_idx, 
selected_size));
+
+                // step 3.2: read remaining expr column and evaluate it.
+                if (_is_need_expr_eval) {
+                    // The predicate column contains the remaining expr 
column, no need second read.
+                    if (!_second_read_column_ids.empty()) {
+                        SCOPED_RAW_TIMER(&_opts.stats->second_read_ns);
+                        RETURN_IF_ERROR(_read_columns_by_rowids(
+                                _second_read_column_ids, _block_rowids, 
sel_rowid_idx,
+                                selected_size, &_current_return_columns));
+                        if (std::find(_second_read_column_ids.begin(),
+                                      _second_read_column_ids.end(),
+                                      _schema.version_col_idx()) != 
_second_read_column_ids.end()) {
+                            _replace_version_col(selected_size);
+                        }
+                        for (auto cid : _second_read_column_ids) {
+                            auto loc = _schema_block_id_map[cid];
+                            block->replace_by_position(loc,
+                                                       
std::move(_current_return_columns[cid]));
+                        }
+                    }
+
+                    DCHECK(block->columns() > 
_schema_block_id_map[*_common_expr_columns.begin()]);
+                    // block->rows() takes the size of the first column by 
default. If the first column is no predicate column,
+                    // it has not been read yet. add a const column that has 
been read to calculate rows().
+                    if (block->rows() == 0) {
+                        vectorized::MutableColumnPtr col0 =
+                                
std::move(*block->get_by_position(0).column).mutate();

Review Comment:
   warning: std::move of the const expression has no effect; remove std::move() 
[performance-move-const-arg]
   
   ```suggestion
                                   *block->get_by_position(0).column.mutate();
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1583,31 +1651,117 @@
         for (int i = 0; i < block->columns(); i++) {
             auto cid = _schema.column_id(i);
             // todo(wb) abstract make column where
-            if (!_is_pred_column[cid]) { // non-predicate
+            if (!_is_pred_column[cid]) {
                 block->replace_by_position(i, 
std::move(_current_return_columns[cid]));
             }
         }
         block->clear_column_data();
         return Status::EndOfFile("no more data in segment");
     }
 
-    if (!_is_need_vec_eval && !_is_need_short_eval) {
-        _replace_version_col(_current_batch_rows_read);
+    if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
         _output_non_pred_columns(block);
         _output_index_result_column(nullptr, 0, block);
     } else {
-        _convert_dict_code_for_predicate_if_necessary();
         uint16_t selected_size = _current_batch_rows_read;
         uint16_t sel_rowid_idx[selected_size];
 
-        // step 1: evaluate vectorization predicate
-        selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, 
selected_size);
+        if (_is_need_vec_eval || _is_need_short_eval) {
+            _convert_dict_code_for_predicate_if_necessary();
+
+            // step 1: evaluate vectorization predicate
+            selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, 
selected_size);
+
+            // step 2: evaluate short circuit predicate
+            // todo(wb) research whether need to read short predicate after 
vectorization evaluation
+            //          to reduce cost of read short circuit columns.
+            //          In SSB test, it make no difference; So need more 
scenarios to test
+            selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, 
selected_size);
 
-        // step 2: evaluate short circuit predicate
-        // todo(wb) research whether need to read short predicate after 
vectorization evaluation
-        //          to reduce cost of read short circuit columns.
-        //          In SSB test, it make no difference; So need more scenarios 
to test
-        selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, 
selected_size);
+            if (selected_size > 0) {
+                // step 3.1: output short circuit and predicate column
+                // when lazy materialization enables, _first_read_column_ids = 
distinct(_short_cir_pred_column_ids + _vec_pred_column_ids)
+                // see _vec_init_lazy_materialization
+                // todo(wb) need to tell input columnids from output columnids
+                RETURN_IF_ERROR(_output_column_by_sel_idx(block, 
_first_read_column_ids,
+                                                          sel_rowid_idx, 
selected_size));
+
+                // step 3.2: read remaining expr column and evaluate it.
+                if (_is_need_expr_eval) {
+                    // The predicate column contains the remaining expr 
column, no need second read.
+                    if (!_second_read_column_ids.empty()) {
+                        SCOPED_RAW_TIMER(&_opts.stats->second_read_ns);
+                        RETURN_IF_ERROR(_read_columns_by_rowids(
+                                _second_read_column_ids, _block_rowids, 
sel_rowid_idx,
+                                selected_size, &_current_return_columns));
+                        if (std::find(_second_read_column_ids.begin(),
+                                      _second_read_column_ids.end(),
+                                      _schema.version_col_idx()) != 
_second_read_column_ids.end()) {
+                            _replace_version_col(selected_size);
+                        }
+                        for (auto cid : _second_read_column_ids) {
+                            auto loc = _schema_block_id_map[cid];
+                            block->replace_by_position(loc,
+                                                       
std::move(_current_return_columns[cid]));
+                        }
+                    }
+
+                    DCHECK(block->columns() > 
_schema_block_id_map[*_common_expr_columns.begin()]);
+                    // block->rows() takes the size of the first column by 
default. If the first column is no predicate column,
+                    // it has not been read yet. add a const column that has 
been read to calculate rows().
+                    if (block->rows() == 0) {
+                        vectorized::MutableColumnPtr col0 =
+                                
std::move(*block->get_by_position(0).column).mutate();
+                        auto res_column = vectorized::ColumnString::create();
+                        res_column->insert_data("", 0);
+                        auto col_const = 
vectorized::ColumnConst::create(std::move(res_column),
+                                                                         
selected_size);
+                        block->replace_by_position(0, std::move(col_const));
+                        _output_index_result_column(sel_rowid_idx, 
selected_size, block);
+                        
block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0);
+                        RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
+                        block->replace_by_position(0, std::move(col0));
+                    } else {
+                        _output_index_result_column(sel_rowid_idx, 
selected_size, block);
+                        
block->shrink_char_type_column_suffix_zero(_char_type_idx);
+                        RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, 
selected_size, block));
+                    }
+                }
+            } else if (_is_need_expr_eval) {
+                for (auto cid : _second_read_column_ids) {
+                    auto loc = _schema_block_id_map[cid];
+                    block->replace_by_position(loc, 
std::move(_current_return_columns[cid]));
+                }
+            }
+        } else if (_is_need_expr_eval) {
+            DCHECK(!_first_read_column_ids.empty());
+            // first read all rows are insert block, initialize sel_rowid_idx 
to all rows.
+            for (auto cid : _first_read_column_ids) {
+                auto loc = _schema_block_id_map[cid];
+                block->replace_by_position(loc, 
std::move(_current_return_columns[cid]));
+            }
+            for (uint32_t i = 0; i < selected_size; ++i) {
+                sel_rowid_idx[i] = i;
+            }
+
+            if (block->rows() == 0) {
+                vectorized::MutableColumnPtr col0 =
+                        std::move(*block->get_by_position(0).column).mutate();

Review Comment:
   warning: std::move of the const expression has no effect; remove std::move() 
[performance-move-const-arg]
   
   ```suggestion
                           *block->get_by_position(0).column.mutate();
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[GitHub] [doris] github-actions[bot] commented on a diff in pull request #15917: [improvement](scan) Support pushdown execute expr ctx

Reply via email to