This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 56058623f6f [fix](expr) Re fix BE core dump while common expr filter 
delete condition column (#29328)
56058623f6f is described below

commit 56058623f6fa4e11cf735e079570b45d7b4309af
Author: Xinyi Zou <zouxiny...@gmail.com>
AuthorDate: Sat Dec 30 08:40:03 2023 +0800

    [fix](expr) Re fix BE core dump while common expr filter delete condition 
column (#29328)
    
    Additional deleted filter condition will be materialized column be at the 
end of the block,
    after _output_column_by_sel_idx will be erase, we not need to filter it,
    so erase it from _columns_to_filter in the first next_batch.
    Eg:
    delete from table where a = 10;
    select b from table;
    a column only effective in segment iterator, the block from query engine 
only contain the b column,
    so no need to filter a column by expr.
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 52 +++++++++-------------
 .../suites/variant_p0/delete_update.groovy         |  1 +
 2 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 5c30423bb53..0cf8ffeffa1 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1504,8 +1504,6 @@ Status SegmentIterator::_seek_columns(const 
std::vector<ColumnId>& column_ids, r
 // todo(wb) need a UT here
 Status SegmentIterator::_vec_init_lazy_materialization() {
     _is_pred_column.resize(_schema->columns().size(), false);
-    std::vector<bool> is_pred_column_no_del_condition;
-    is_pred_column_no_del_condition.resize(_schema->columns().size(), false);
 
     // including short/vec/delete pred
     std::set<ColumnId> pred_column_ids;
@@ -1547,7 +1545,6 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
         for (auto predicate : _col_predicates) {
             auto cid = predicate->column_id();
             _is_pred_column[cid] = true;
-            is_pred_column_no_del_condition[cid] = true;
             pred_column_ids.insert(cid);
 
             // check pred using short eval or vec eval
@@ -1601,16 +1598,10 @@ Status 
SegmentIterator::_vec_init_lazy_materialization() {
         if (!_common_expr_columns.empty()) {
             _is_need_expr_eval = true;
             for (auto cid : _schema->column_ids()) {
-                // pred column also needs to be filtered by expr, exclude 
delete condition column,
-                // Delete condition column not need to be filtered, query 
engine does not need it,
-                // after _output_column_by_sel_idx, delete condition 
materialize column will be erase
-                // at the end of the block.
-                // Eg:
-                //      `delete from table where a = 10;`
-                //      `select b from table;`
-                // a column only effective in segment iterator, the block from 
query engine only contain the b column,
-                // so no need to filter a column by expr.
-                if (_is_common_expr_column[cid] || 
is_pred_column_no_del_condition[cid]) {
+                // pred column also needs to be filtered by expr, exclude 
additional delete condition column.
+                // if delete condition column not in the block, no filter is 
needed
+                // and will be removed from _columns_to_filter in the first 
next_batch.
+                if (_is_common_expr_column[cid] || _is_pred_column[cid]) {
                     auto loc = _schema_block_id_map[cid];
                     _columns_to_filter.push_back(loc);
                 }
@@ -2171,6 +2162,22 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                 _current_return_columns[cid]->reserve(_opts.block_row_max);
             }
         }
+
+        // Additional deleted filter condition will be materialized column be 
at the end of the block,
+        // after _output_column_by_sel_idx  will be erase, we not need to 
filter it,
+        // so erase it from _columns_to_filter in the first next_batch.
+        // Eg:
+        //      `delete from table where a = 10;`
+        //      `select b from table;`
+        // a column only effective in segment iterator, the block from query 
engine only contain the b column,
+        // so no need to filter a column by expr.
+        for (auto it = _columns_to_filter.begin(); it != 
_columns_to_filter.end();) {
+            if (*it >= block->columns()) {
+                it = _columns_to_filter.erase(it);
+            } else {
+                ++it;
+            }
+        }
     }
     _init_current_block(block, _current_return_columns);
     _converted_column_ids.assign(_schema->columns().size(), 0);
@@ -2185,25 +2192,6 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
         _replace_version_col(_current_batch_rows_read);
     }
 
-    // If col >= block->columns(), it means col should not be filtered, there 
is a BUG.
-    // such as delete condition column was incorrectly put into 
columns_to_filter,
-    // which is usually at the end of the block. only check during the first 
next_batch.
-    if (_opts.stats->blocks_load == 0) {
-        for (const auto& col : _columns_to_filter) {
-            if (col >= block->columns()) {
-                std::ostringstream ss;
-                for (const auto& i : _columns_to_filter) {
-                    ss << i << "-";
-                }
-                throw Exception(
-                        ErrorCode::INTERNAL_ERROR,
-                        "filter block column id(index) greater than 
block->columns(), "
-                        "column id={}, all columns that need filter={}, block 
columns num={}",
-                        col, ss.str().substr(0, ss.str().length() - 1), 
block->columns());
-            }
-        }
-    }
-
     _opts.stats->blocks_load += 1;
     _opts.stats->raw_rows_read += _current_batch_rows_read;
 
diff --git a/regression-test/suites/variant_p0/delete_update.groovy 
b/regression-test/suites/variant_p0/delete_update.groovy
index e0cfcfa82f0..bbd999559b4 100644
--- a/regression-test/suites/variant_p0/delete_update.groovy
+++ b/regression-test/suites/variant_p0/delete_update.groovy
@@ -63,6 +63,7 @@ suite("regression_test_variant_delete_and_update", 
"variant_type"){
     sql """update ${table_name} set v = '{"updated_value" : 1111}' where k = 
7"""
     qt_sql "select * from ${table_name} order by k"
 
+    sql """delete from ${table_name} where v = 'xxx' or vs = 'yyy'"""
     sql """delete from ${table_name} where vs = 'xxx' or vs = 'yyy'"""
     qt_sql "select * from ${table_name} order by k"
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to