github-actions[bot] commented on code in PR #15917: URL: https://github.com/apache/doris/pull/15917#discussion_r1129940414
########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -1583,31 +1651,117 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { for (int i = 0; i < block->columns(); i++) { auto cid = _schema.column_id(i); // todo(wb) abstract make column where - if (!_is_pred_column[cid]) { // non-predicate + if (!_is_pred_column[cid]) { block->replace_by_position(i, std::move(_current_return_columns[cid])); } } block->clear_column_data(); return Status::EndOfFile("no more data in segment"); } - if (!_is_need_vec_eval && !_is_need_short_eval) { - _replace_version_col(_current_batch_rows_read); + if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) { _output_non_pred_columns(block); _output_index_result_column(nullptr, 0, block); } else { - _convert_dict_code_for_predicate_if_necessary(); uint16_t selected_size = _current_batch_rows_read; uint16_t sel_rowid_idx[selected_size]; - // step 1: evaluate vectorization predicate - selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, selected_size); + if (_is_need_vec_eval || _is_need_short_eval) { + _convert_dict_code_for_predicate_if_necessary(); + + // step 1: evaluate vectorization predicate + selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, selected_size); + + // step 2: evaluate short circuit predicate + // todo(wb) research whether need to read short predicate after vectorization evaluation + // to reduce cost of read short circuit columns. + // In SSB test, it make no difference; So need more scenarios to test + selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, selected_size); - // step 2: evaluate short circuit predicate - // todo(wb) research whether need to read short predicate after vectorization evaluation - // to reduce cost of read short circuit columns. - // In SSB test, it make no difference; So need more scenarios to test - selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, selected_size); + if (selected_size > 0) { + // step 3.1: output short circuit and predicate column + // when lazy materialization enables, _first_read_column_ids = distinct(_short_cir_pred_column_ids + _vec_pred_column_ids) + // see _vec_init_lazy_materialization + // todo(wb) need to tell input columnids from output columnids + RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, + sel_rowid_idx, selected_size)); + + // step 3.2: read remaining expr column and evaluate it. + if (_is_need_expr_eval) { + // The predicate column contains the remaining expr column, no need second read. + if (!_second_read_column_ids.empty()) { + SCOPED_RAW_TIMER(&_opts.stats->second_read_ns); + RETURN_IF_ERROR(_read_columns_by_rowids( + _second_read_column_ids, _block_rowids, sel_rowid_idx, + selected_size, &_current_return_columns)); + if (std::find(_second_read_column_ids.begin(), + _second_read_column_ids.end(), + _schema.version_col_idx()) != _second_read_column_ids.end()) { + _replace_version_col(selected_size); + } + for (auto cid : _second_read_column_ids) { + auto loc = _schema_block_id_map[cid]; + block->replace_by_position(loc, + std::move(_current_return_columns[cid])); + } + } + + DCHECK(block->columns() > _schema_block_id_map[*_common_expr_columns.begin()]); + // block->rows() takes the size of the first column by default. If the first column is no predicate column, + // it has not been read yet. add a const column that has been read to calculate rows(). + if (block->rows() == 0) { + vectorized::MutableColumnPtr col0 = + std::move(*block->get_by_position(0).column).mutate(); Review Comment: warning: std::move of the const expression has no effect; remove std::move() [performance-move-const-arg] ```suggestion *block->get_by_position(0).column.mutate(); ``` ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -1583,31 +1651,117 @@ for (int i = 0; i < block->columns(); i++) { auto cid = _schema.column_id(i); // todo(wb) abstract make column where - if (!_is_pred_column[cid]) { // non-predicate + if (!_is_pred_column[cid]) { block->replace_by_position(i, std::move(_current_return_columns[cid])); } } block->clear_column_data(); return Status::EndOfFile("no more data in segment"); } - if (!_is_need_vec_eval && !_is_need_short_eval) { - _replace_version_col(_current_batch_rows_read); + if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) { _output_non_pred_columns(block); _output_index_result_column(nullptr, 0, block); } else { - _convert_dict_code_for_predicate_if_necessary(); uint16_t selected_size = _current_batch_rows_read; uint16_t sel_rowid_idx[selected_size]; - // step 1: evaluate vectorization predicate - selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, selected_size); + if (_is_need_vec_eval || _is_need_short_eval) { + _convert_dict_code_for_predicate_if_necessary(); + + // step 1: evaluate vectorization predicate + selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, selected_size); + + // step 2: evaluate short circuit predicate + // todo(wb) research whether need to read short predicate after vectorization evaluation + // to reduce cost of read short circuit columns. + // In SSB test, it make no difference; So need more scenarios to test + selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, selected_size); - // step 2: evaluate short circuit predicate - // todo(wb) research whether need to read short predicate after vectorization evaluation - // to reduce cost of read short circuit columns. - // In SSB test, it make no difference; So need more scenarios to test - selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, selected_size); + if (selected_size > 0) { + // step 3.1: output short circuit and predicate column + // when lazy materialization enables, _first_read_column_ids = distinct(_short_cir_pred_column_ids + _vec_pred_column_ids) + // see _vec_init_lazy_materialization + // todo(wb) need to tell input columnids from output columnids + RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, + sel_rowid_idx, selected_size)); + + // step 3.2: read remaining expr column and evaluate it. + if (_is_need_expr_eval) { + // The predicate column contains the remaining expr column, no need second read. + if (!_second_read_column_ids.empty()) { + SCOPED_RAW_TIMER(&_opts.stats->second_read_ns); + RETURN_IF_ERROR(_read_columns_by_rowids( + _second_read_column_ids, _block_rowids, sel_rowid_idx, + selected_size, &_current_return_columns)); + if (std::find(_second_read_column_ids.begin(), + _second_read_column_ids.end(), + _schema.version_col_idx()) != _second_read_column_ids.end()) { + _replace_version_col(selected_size); + } + for (auto cid : _second_read_column_ids) { + auto loc = _schema_block_id_map[cid]; + block->replace_by_position(loc, + std::move(_current_return_columns[cid])); + } + } + + DCHECK(block->columns() > _schema_block_id_map[*_common_expr_columns.begin()]); + // block->rows() takes the size of the first column by default. If the first column is no predicate column, + // it has not been read yet. add a const column that has been read to calculate rows(). + if (block->rows() == 0) { + vectorized::MutableColumnPtr col0 = + std::move(*block->get_by_position(0).column).mutate(); + auto res_column = vectorized::ColumnString::create(); + res_column->insert_data("", 0); + auto col_const = vectorized::ColumnConst::create(std::move(res_column), + selected_size); + block->replace_by_position(0, std::move(col_const)); + _output_index_result_column(sel_rowid_idx, selected_size, block); + block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0); + RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block)); + block->replace_by_position(0, std::move(col0)); + } else { + _output_index_result_column(sel_rowid_idx, selected_size, block); + block->shrink_char_type_column_suffix_zero(_char_type_idx); + RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block)); + } + } + } else if (_is_need_expr_eval) { + for (auto cid : _second_read_column_ids) { + auto loc = _schema_block_id_map[cid]; + block->replace_by_position(loc, std::move(_current_return_columns[cid])); + } + } + } else if (_is_need_expr_eval) { + DCHECK(!_first_read_column_ids.empty()); + // first read all rows are insert block, initialize sel_rowid_idx to all rows. + for (auto cid : _first_read_column_ids) { + auto loc = _schema_block_id_map[cid]; + block->replace_by_position(loc, std::move(_current_return_columns[cid])); + } + for (uint32_t i = 0; i < selected_size; ++i) { + sel_rowid_idx[i] = i; + } + + if (block->rows() == 0) { + vectorized::MutableColumnPtr col0 = + std::move(*block->get_by_position(0).column).mutate(); Review Comment: warning: std::move of the const expression has no effect; remove std::move() [performance-move-const-arg] ```suggestion *block->get_by_position(0).column.mutate(); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org