This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 1b73bda32df [fix](expr) Fix BE core dump while common expr filter 
delete condition column (#29107) (#29208)
1b73bda32df is described below

commit 1b73bda32dfa83d58dc3e5cf3bb3cb5dbada05f0
Author: Xinyi Zou <zouxiny...@gmail.com>
AuthorDate: Thu Dec 28 15:00:18 2023 +0800

    [fix](expr) Fix BE core dump while common expr filter delete condition 
column (#29107) (#29208)
    
    pred column also needs to be filtered by expr, exclude delete condition 
column, delete condition column not need to be filtered, query engine does not 
need it, after _output_column_by_sel_idx, delete condition materialize column 
will be erase at the end of the block.
    Eg:
    delete from table where a = 10;
    select b from table;
    a column only effective in segment iterator, the block from query engine 
only contain the b column, so no need to filter a column by expr.
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 34 ++++++++++++-
 .../data/correctness/test_pushdown_common_expr.out | 18 +++++++
 .../correctness/test_pushdown_common_expr.groovy   | 58 ++++++++++++++++++++++
 3 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 44ff03c9671..d41b18609a4 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1454,6 +1454,8 @@ Status SegmentIterator::_seek_columns(const 
std::vector<ColumnId>& column_ids, r
 // todo(wb) need a UT here
 Status SegmentIterator::_vec_init_lazy_materialization() {
     _is_pred_column.resize(_schema->columns().size(), false);
+    std::vector<bool> is_pred_column_no_del_condition;
+    is_pred_column_no_del_condition.resize(_schema->columns().size(), false);
 
     // including short/vec/delete pred
     std::set<ColumnId> pred_column_ids;
@@ -1495,6 +1497,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
         for (auto predicate : _col_predicates) {
             auto cid = predicate->column_id();
             _is_pred_column[cid] = true;
+            is_pred_column_no_del_condition[cid] = true;
             pred_column_ids.insert(cid);
 
             // check pred using short eval or vec eval
@@ -1548,8 +1551,16 @@ Status SegmentIterator::_vec_init_lazy_materialization() 
{
         if (!_common_expr_columns.empty()) {
             _is_need_expr_eval = true;
             for (auto cid : _schema->column_ids()) {
-                // pred column also needs to be filtered by expr
-                if (_is_common_expr_column[cid] || _is_pred_column[cid]) {
+                // pred column also needs to be filtered by expr, exclude 
delete condition column,
+                // Delete condition column not need to be filtered, query 
engine does not need it,
+                // after _output_column_by_sel_idx, delete condition 
materialize column will be erase
+                // at the end of the block.
+                // Eg:
+                //      `delete from table where a = 10;`
+                //      `select b from table;`
+                // a column only effective in segment iterator, the block from 
query engine only contain the b column,
+                // so no need to filter a column by expr.
+                if (_is_common_expr_column[cid] || 
is_pred_column_no_del_condition[cid]) {
                     auto loc = _schema_block_id_map[cid];
                     _columns_to_filter.push_back(loc);
                 }
@@ -2040,6 +2051,25 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
         _replace_version_col(_current_batch_rows_read);
     }
 
+    // If col >= block->columns(), it means col should not be filtered, there 
is a BUG.
+    // such as delete condition column was incorrectly put into 
columns_to_filter,
+    // which is usually at the end of the block. only check during the first 
next_batch.
+    if (_opts.stats->blocks_load == 0) {
+        for (const auto& col : _columns_to_filter) {
+            if (col >= block->columns()) {
+                std::ostringstream ss;
+                for (const auto& i : _columns_to_filter) {
+                    ss << i << "-";
+                }
+                throw Exception(
+                        ErrorCode::INTERNAL_ERROR,
+                        "filter block column id(index) greater than 
block->columns(), "
+                        "column id={}, all columns that need filter={}, block 
columns num={}",
+                        col, ss.str().substr(0, ss.str().length() - 1), 
block->columns());
+            }
+        }
+    }
+
     _opts.stats->blocks_load += 1;
     _opts.stats->raw_rows_read += _current_batch_rows_read;
 
diff --git a/regression-test/data/correctness/test_pushdown_common_expr.out 
b/regression-test/data/correctness/test_pushdown_common_expr.out
index 8da380e841b..0055c93b7d8 100644
--- a/regression-test/data/correctness/test_pushdown_common_expr.out
+++ b/regression-test/data/correctness/test_pushdown_common_expr.out
@@ -71,3 +71,21 @@
 
 -- !9 --
 
+-- !1 --
+0
+
+-- !2 --
+0
+
+-- !3 --
+0
+
+-- !4 --
+0
+
+-- !5 --
+0
+
+-- !6 --
+a
+
diff --git 
a/regression-test/suites/correctness/test_pushdown_common_expr.groovy 
b/regression-test/suites/correctness/test_pushdown_common_expr.groovy
index 2395f057b1d..aa0e68e3e4c 100644
--- a/regression-test/suites/correctness/test_pushdown_common_expr.groovy
+++ b/regression-test/suites/correctness/test_pushdown_common_expr.groovy
@@ -150,4 +150,62 @@ suite("test_pushdown_common_expr") {
             WHERE random() = 0
         ) AS t2 ON t1.c1 = t2.c1
     """
+
+    sql """ DROP TABLE IF EXISTS t_pushdown_common_expr_for_del """
+    sql """
+    CREATE TABLE `t_pushdown_common_expr_for_del` (
+        `c1` int(11) NULL,
+        `c2` varchar(100) NULL COMMENT "",
+        `c3` varchar(100) NULL COMMENT "",
+        `c4` varchar(100) NULL COMMENT "",
+        `c5` varchar(100) NULL COMMENT "",
+        `c6` varchar(100) NULL COMMENT "",
+        `c7` varchar(100) NULL COMMENT ""
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`c1`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`c1`) BUCKETS 1
+    PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "disable_auto_compaction" = "true"
+    );
+    """
+
+    sql """
+        INSERT INTO t_pushdown_common_expr_for_del VALUES
+        (1,'a','aa','aaa','aaaa','aaaaa','aaaaaa'),
+        (2,'b','bb','bbb','bbbb','bbbbb','bbbbbb'),
+        (3,'c','cc','ccc','cccc','ccccc','cccccc'),
+        (4,'d','dd','ddd','dddd','ddddd','dddddd'),
+        (5,'e','ee','eee','eeee','eeeee','eeeeee'),
+        (6,'f','ff','fff','ffff','fffff','ffffff'),
+        (7,'g','gg','ggg','gggg','ggggg','gggggg');
+     """
+
+    sql """set enable_common_expr_pushdown=true"""
+
+    // delete condition columns num > block columns num
+    order_qt_1 """
+        delete from t_pushdown_common_expr_for_del where c3 = "bb";
+    """
+
+    order_qt_2 """
+        delete from t_pushdown_common_expr_for_del where c4 = "ccc";
+    """
+
+    order_qt_3 """
+        delete from t_pushdown_common_expr_for_del where c5 = "dddd";
+    """
+
+    order_qt_4 """
+        delete from t_pushdown_common_expr_for_del where c6 = "eeeee";
+    """
+
+    order_qt_5 """
+        delete from t_pushdown_common_expr_for_del where c7 = "ffffff";
+    """
+
+    order_qt_6 """
+        select c2 from t_pushdown_common_expr_for_del where upper(c2) = "A";
+    """
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to