This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 4b256117dd3 [fix](expr) Re fix BE core dump while common expr filter delete condition column #29328 (#29329) 4b256117dd3 is described below commit 4b256117dd34450bf736924f707c55aeac8a262b Author: Xinyi Zou <zouxiny...@gmail.com> AuthorDate: Fri Dec 29 23:24:46 2023 +0800 [fix](expr) Re fix BE core dump while common expr filter delete condition column #29328 (#29329) --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 52 +++++++++------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index d41b18609a4..0a786d7a545 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1454,8 +1454,6 @@ Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, r // todo(wb) need a UT here Status SegmentIterator::_vec_init_lazy_materialization() { _is_pred_column.resize(_schema->columns().size(), false); - std::vector<bool> is_pred_column_no_del_condition; - is_pred_column_no_del_condition.resize(_schema->columns().size(), false); // including short/vec/delete pred std::set<ColumnId> pred_column_ids; @@ -1497,7 +1495,6 @@ Status SegmentIterator::_vec_init_lazy_materialization() { for (auto predicate : _col_predicates) { auto cid = predicate->column_id(); _is_pred_column[cid] = true; - is_pred_column_no_del_condition[cid] = true; pred_column_ids.insert(cid); // check pred using short eval or vec eval @@ -1551,16 +1548,10 @@ Status SegmentIterator::_vec_init_lazy_materialization() { if (!_common_expr_columns.empty()) { _is_need_expr_eval = true; for (auto cid : _schema->column_ids()) { - // pred column also needs to be filtered by expr, exclude delete condition column, - // Delete condition column not need to be filtered, query engine does not need it, - // after _output_column_by_sel_idx, delete condition materialize column will be erase - // at the end of the block. - // Eg: - // `delete from table where a = 10;` - // `select b from table;` - // a column only effective in segment iterator, the block from query engine only contain the b column, - // so no need to filter a column by expr. - if (_is_common_expr_column[cid] || is_pred_column_no_del_condition[cid]) { + // pred column also needs to be filtered by expr, exclude additional delete condition column. + // if delete condition column not in the block, no filter is needed + // and will be removed from _columns_to_filter in the first next_batch. + if (_is_common_expr_column[cid] || _is_pred_column[cid]) { auto loc = _schema_block_id_map[cid]; _columns_to_filter.push_back(loc); } @@ -2037,6 +2028,22 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _current_return_columns[cid]->reserve(_opts.block_row_max); } } + + // Additional deleted filter condition will be materialized column be at the end of the block, + // after _output_column_by_sel_idx will be erase, we not need to filter it, + // so erase it from _columns_to_filter in the first next_batch. + // Eg: + // `delete from table where a = 10;` + // `select b from table;` + // a column only effective in segment iterator, the block from query engine only contain the b column, + // so no need to filter a column by expr. + for (auto it = _columns_to_filter.begin(); it != _columns_to_filter.end();) { + if (*it >= block->columns()) { + it = _columns_to_filter.erase(it); + } else { + ++it; + } + } } _init_current_block(block, _current_return_columns); @@ -2051,25 +2058,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _replace_version_col(_current_batch_rows_read); } - // If col >= block->columns(), it means col should not be filtered, there is a BUG. - // such as delete condition column was incorrectly put into columns_to_filter, - // which is usually at the end of the block. only check during the first next_batch. - if (_opts.stats->blocks_load == 0) { - for (const auto& col : _columns_to_filter) { - if (col >= block->columns()) { - std::ostringstream ss; - for (const auto& i : _columns_to_filter) { - ss << i << "-"; - } - throw Exception( - ErrorCode::INTERNAL_ERROR, - "filter block column id(index) greater than block->columns(), " - "column id={}, all columns that need filter={}, block columns num={}", - col, ss.str().substr(0, ss.str().length() - 1), block->columns()); - } - } - } - _opts.stats->blocks_load += 1; _opts.stats->raw_rows_read += _current_batch_rows_read; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org