This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 56058623f6f [fix](expr) Re fix BE core dump while common expr filter delete condition column (#29328) 56058623f6f is described below commit 56058623f6fa4e11cf735e079570b45d7b4309af Author: Xinyi Zou <zouxiny...@gmail.com> AuthorDate: Sat Dec 30 08:40:03 2023 +0800 [fix](expr) Re fix BE core dump while common expr filter delete condition column (#29328) Additional deleted filter condition will be materialized column be at the end of the block, after _output_column_by_sel_idx will be erase, we not need to filter it, so erase it from _columns_to_filter in the first next_batch. Eg: delete from table where a = 10; select b from table; a column only effective in segment iterator, the block from query engine only contain the b column, so no need to filter a column by expr. --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 52 +++++++++------------- .../suites/variant_p0/delete_update.groovy | 1 + 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 5c30423bb53..0cf8ffeffa1 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1504,8 +1504,6 @@ Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, r // todo(wb) need a UT here Status SegmentIterator::_vec_init_lazy_materialization() { _is_pred_column.resize(_schema->columns().size(), false); - std::vector<bool> is_pred_column_no_del_condition; - is_pred_column_no_del_condition.resize(_schema->columns().size(), false); // including short/vec/delete pred std::set<ColumnId> pred_column_ids; @@ -1547,7 +1545,6 @@ Status SegmentIterator::_vec_init_lazy_materialization() { for (auto predicate : _col_predicates) { auto cid = predicate->column_id(); _is_pred_column[cid] = true; - is_pred_column_no_del_condition[cid] = true; pred_column_ids.insert(cid); // check pred using short eval or vec eval @@ -1601,16 +1598,10 @@ Status SegmentIterator::_vec_init_lazy_materialization() { if (!_common_expr_columns.empty()) { _is_need_expr_eval = true; for (auto cid : _schema->column_ids()) { - // pred column also needs to be filtered by expr, exclude delete condition column, - // Delete condition column not need to be filtered, query engine does not need it, - // after _output_column_by_sel_idx, delete condition materialize column will be erase - // at the end of the block. - // Eg: - // `delete from table where a = 10;` - // `select b from table;` - // a column only effective in segment iterator, the block from query engine only contain the b column, - // so no need to filter a column by expr. - if (_is_common_expr_column[cid] || is_pred_column_no_del_condition[cid]) { + // pred column also needs to be filtered by expr, exclude additional delete condition column. + // if delete condition column not in the block, no filter is needed + // and will be removed from _columns_to_filter in the first next_batch. + if (_is_common_expr_column[cid] || _is_pred_column[cid]) { auto loc = _schema_block_id_map[cid]; _columns_to_filter.push_back(loc); } @@ -2171,6 +2162,22 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _current_return_columns[cid]->reserve(_opts.block_row_max); } } + + // Additional deleted filter condition will be materialized column be at the end of the block, + // after _output_column_by_sel_idx will be erase, we not need to filter it, + // so erase it from _columns_to_filter in the first next_batch. + // Eg: + // `delete from table where a = 10;` + // `select b from table;` + // a column only effective in segment iterator, the block from query engine only contain the b column, + // so no need to filter a column by expr. + for (auto it = _columns_to_filter.begin(); it != _columns_to_filter.end();) { + if (*it >= block->columns()) { + it = _columns_to_filter.erase(it); + } else { + ++it; + } + } } _init_current_block(block, _current_return_columns); _converted_column_ids.assign(_schema->columns().size(), 0); @@ -2185,25 +2192,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _replace_version_col(_current_batch_rows_read); } - // If col >= block->columns(), it means col should not be filtered, there is a BUG. - // such as delete condition column was incorrectly put into columns_to_filter, - // which is usually at the end of the block. only check during the first next_batch. - if (_opts.stats->blocks_load == 0) { - for (const auto& col : _columns_to_filter) { - if (col >= block->columns()) { - std::ostringstream ss; - for (const auto& i : _columns_to_filter) { - ss << i << "-"; - } - throw Exception( - ErrorCode::INTERNAL_ERROR, - "filter block column id(index) greater than block->columns(), " - "column id={}, all columns that need filter={}, block columns num={}", - col, ss.str().substr(0, ss.str().length() - 1), block->columns()); - } - } - } - _opts.stats->blocks_load += 1; _opts.stats->raw_rows_read += _current_batch_rows_read; diff --git a/regression-test/suites/variant_p0/delete_update.groovy b/regression-test/suites/variant_p0/delete_update.groovy index e0cfcfa82f0..bbd999559b4 100644 --- a/regression-test/suites/variant_p0/delete_update.groovy +++ b/regression-test/suites/variant_p0/delete_update.groovy @@ -63,6 +63,7 @@ suite("regression_test_variant_delete_and_update", "variant_type"){ sql """update ${table_name} set v = '{"updated_value" : 1111}' where k = 7""" qt_sql "select * from ${table_name} order by k" + sql """delete from ${table_name} where v = 'xxx' or vs = 'yyy'""" sql """delete from ${table_name} where vs = 'xxx' or vs = 'yyy'""" qt_sql "select * from ${table_name} order by k" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org