This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e0cd8599d2 [fix](delete) fix delete from bug which can get wrong 
result (#17146)
e0cd8599d2 is described below

commit e0cd8599d2c8ddfcb3162f77609892ef3fefd78c
Author: xueweizhang <zxw520bl...@163.com>
AuthorDate: Tue Feb 28 09:20:10 2023 +0800

    [fix](delete) fix delete from bug which can get wrong result (#17146)
    
    理论上,如果是两次独立的删除,比如delete from table where a=1; delete from table where 
a=2;其实这个地方应该可以使用的,但是目前的代码,是把所有不同版本的delete predicates和不同列的delete 
predicates都放到一起了,失去了版本信息、失去了谓词间可能是and的关系,统一弱化成了delete predicates都是独立的,有一个delete 
predicates满足条件,就把page都去掉。
    这个pr的修改方式,就是在当前代码的基础上,当只有一个delete predicate的时候才能保证后续淘汰page的正确性,所以这里一律加了 == 
1的判断才传递delete predicates。
    如果要把不同版本的delete predicates和不同列的delete 
predicates作为完整和严谨的逻辑去判断page,需要修改的设计就有点多了,目前的方案算是一种优先解决bug的思路,后续可以进一步把delete 
predicates这块加速zone判断进行page淘汰的逻辑完善,提高delete predicates使用的场景。
---
 be/src/olap/delete_handler.cpp                     |  35 ++-
 be/src/olap/delete_handler.h                       |   2 +-
 be/src/olap/iterators.h                            |   2 +-
 be/src/olap/rowset/beta_rowset_reader.cpp          |   4 +-
 be/src/olap/rowset/segment_v2/segment.cpp          |   2 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |   4 +-
 .../data/delete_p0/test_zone_map_delete.out        | 286 +++++++++++++++++++++
 .../suites/delete_p0/test_zone_map_delete.groovy   |  97 +++++++
 8 files changed, 405 insertions(+), 27 deletions(-)

diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 0b755ae890..313d48dfda 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -311,8 +311,8 @@ void DeleteHandler::finalize() {
 
 void DeleteHandler::get_delete_conditions_after_version(
         int64_t version, AndBlockColumnPredicate* 
and_block_column_predicate_ptr,
-        std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>* 
col_id_to_del_predicates)
-        const {
+        std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>*
+                del_predicates_for_zone_map) const {
     for (auto& del_cond : _del_conds) {
         if (del_cond.filter_version > version) {
             // now, only query support delete column predicate operator
@@ -322,33 +322,28 @@ void DeleteHandler::get_delete_conditions_after_version(
                             new 
SingleColumnBlockPredicate(del_cond.column_predicate_vec[0]);
                     and_block_column_predicate_ptr->add_column_predicate(
                             single_column_block_predicate);
-                    if (col_id_to_del_predicates->count(
+                    if (del_predicates_for_zone_map->count(
                                 del_cond.column_predicate_vec[0]->column_id()) 
< 1) {
-                        col_id_to_del_predicates->insert(
+                        del_predicates_for_zone_map->insert(
                                 {del_cond.column_predicate_vec[0]->column_id(),
                                  std::vector<const ColumnPredicate*> {}});
                     }
-                    
(*col_id_to_del_predicates)[del_cond.column_predicate_vec[0]->column_id()]
+                    
(*del_predicates_for_zone_map)[del_cond.column_predicate_vec[0]->column_id()]
                             .push_back(del_cond.column_predicate_vec[0]);
                 } else {
                     auto or_column_predicate = new OrBlockColumnPredicate();
 
                     // build or_column_predicate
-                    std::for_each(
-                            del_cond.column_predicate_vec.cbegin(),
-                            del_cond.column_predicate_vec.cend(),
-                            [&or_column_predicate,
-                             col_id_to_del_predicates](const ColumnPredicate* 
predicate) {
-                                if 
(col_id_to_del_predicates->count(predicate->column_id()) < 1) {
-                                    col_id_to_del_predicates->insert(
-                                            {predicate->column_id(),
-                                             std::vector<const 
ColumnPredicate*> {}});
-                                }
-                                
(*col_id_to_del_predicates)[predicate->column_id()].push_back(
-                                        predicate);
-                                or_column_predicate->add_column_predicate(
-                                        new 
SingleColumnBlockPredicate(predicate));
-                            });
+                    // when delete from where a = 1 and b = 2, we can not use 
del_predicates_for_zone_map to filter zone page,
+                    // so here do not put predicate to 
del_predicates_for_zone_map,
+                    // refer #17145 for more details.
+                    // // TODO: need refactor design and code to use more 
version delete and more column delete to filter zone page.
+                    std::for_each(del_cond.column_predicate_vec.cbegin(),
+                                  del_cond.column_predicate_vec.cend(),
+                                  [&or_column_predicate](const 
ColumnPredicate* predicate) {
+                                      
or_column_predicate->add_column_predicate(
+                                              new 
SingleColumnBlockPredicate(predicate));
+                                  });
                     
and_block_column_predicate_ptr->add_column_predicate(or_column_predicate);
                 }
             }
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index 815635ec53..0b33d638e1 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -101,7 +101,7 @@ public:
     void get_delete_conditions_after_version(
             int64_t version, AndBlockColumnPredicate* 
and_block_column_predicate_ptr,
             std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>*
-                    col_id_to_del_predicates) const;
+                    del_predicates_for_zone_map) const;
 
 private:
     // Use regular expression to extract 'column_name', 'op' and 'operands'
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index bb17a1fecf..58fd8f3ef7 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -101,7 +101,7 @@ public:
     std::vector<ColumnPredicate*> column_predicates;
     std::vector<ColumnPredicate*> column_predicates_except_leafnode_of_andnode;
     std::unordered_map<int32_t, std::shared_ptr<AndBlockColumnPredicate>> 
col_id_to_predicates;
-    std::unordered_map<int32_t, std::vector<const ColumnPredicate*>> 
col_id_to_del_predicates;
+    std::unordered_map<int32_t, std::vector<const ColumnPredicate*>> 
del_predicates_for_zone_map;
     TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE;
 
     // REQUIRED (null is not allowed)
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp 
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 01d86965e5..e57bda9fea 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -38,7 +38,7 @@ void BetaRowsetReader::reset_read_options() {
     _read_options.delete_condition_predicates = 
std::make_shared<AndBlockColumnPredicate>();
     _read_options.column_predicates.clear();
     _read_options.col_id_to_predicates.clear();
-    _read_options.col_id_to_del_predicates.clear();
+    _read_options.del_predicates_for_zone_map.clear();
     _read_options.key_ranges.clear();
 }
 
@@ -83,7 +83,7 @@ Status 
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     if (read_context->delete_handler != nullptr) {
         read_context->delete_handler->get_delete_conditions_after_version(
                 _rowset->end_version(), 
_read_options.delete_condition_predicates.get(),
-                &_read_options.col_id_to_del_predicates);
+                &_read_options.del_predicates_for_zone_map);
     }
 
     std::vector<uint32_t> read_columns;
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index c0f4d7fa89..ea0e6b44cd 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -129,7 +129,7 @@ Status Segment::new_iterator(const Schema& schema, const 
StorageReadOptions& rea
     }
 
     RETURN_IF_ERROR(load_index());
-    if (read_options.col_id_to_del_predicates.empty() &&
+    if (read_options.delete_condition_predicates->num_of_column_predicate() == 
0 &&
         read_options.push_down_agg_type_opt != TPushAggOp::NONE) {
         
iter->reset(vectorized::new_vstatistics_iterator(this->shared_from_this(), 
schema));
     } else {
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 73feeb3539..e75a48ac8a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -380,8 +380,8 @@ Status 
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
         DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
         
RETURN_IF_ERROR(_column_iterators[_schema.unique_id(cid)]->get_row_ranges_by_zone_map(
                 _opts.col_id_to_predicates[cid].get(),
-                _opts.col_id_to_del_predicates.count(cid) > 0
-                        ? &(_opts.col_id_to_del_predicates[cid])
+                _opts.del_predicates_for_zone_map.count(cid) > 0
+                        ? &(_opts.del_predicates_for_zone_map[cid])
                         : nullptr,
                 &column_row_ranges));
         // intersect different columns's row ranges to get final row ranges by 
zone map
diff --git a/regression-test/data/delete_p0/test_zone_map_delete.out 
b/regression-test/data/delete_p0/test_zone_map_delete.out
index d119d5e8de..ba300220f4 100644
--- a/regression-test/data/delete_p0/test_zone_map_delete.out
+++ b/regression-test/data/delete_p0/test_zone_map_delete.out
@@ -57,3 +57,289 @@
 
 -- !sql --
 
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+1      11
+1      22
+1      33
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      11
+1      22
+1      33
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      11
+1      22
+1      33
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+
+-- !sql --
+1      22
+1      33
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      22
+1      33
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      33
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      33
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      44
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      55
+1      66
+1      77
+1      88
+1      99
+1      100
+1      101
+1      102
+1      111
+1      122
+1      133
+1      144
+1      155
+1      166
+1      177
+1      188
+1      199
+1      200
+1      201
+1      202
+
+-- !sql --
+1      201
+1      202
+
+-- !sql --
+1      201
+
+-- !sql --
+1      11
+1      22
+
+-- !sql --
+1      11
+
diff --git a/regression-test/suites/delete_p0/test_zone_map_delete.groovy 
b/regression-test/suites/delete_p0/test_zone_map_delete.groovy
index 5e8beb375d..f79830c815 100644
--- a/regression-test/suites/delete_p0/test_zone_map_delete.groovy
+++ b/regression-test/suites/delete_p0/test_zone_map_delete.groovy
@@ -54,4 +54,101 @@ suite("test_zone_map_delete") {
     qt_sql """select * from ${tableName} ORDER BY k1;"""
 
     sql """ DROP TABLE IF EXISTS ${tableName} """
+
+    // =========================
+    sql """
+    CREATE TABLE IF NOT EXISTS ${tableName} (
+        `k1` bigint(20) NULL,
+        `k2` largeint(40) NULL,
+        `k3` largeint(40) NULL
+    ) ENGINE=OLAP
+    AGGREGATE KEY(`k1`, `k2`, `k3`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "in_memory" = "false",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "true"
+    );
+    """
+
+    sql """ truncate table ${tableName}; """
+
+    sql """ insert into ${tableName} 
values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
+
+    sql """ delete from ${tableName} where k2=0;"""
+
+    sql """ delete from ${tableName} where k2=1;"""
+
+    qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """ select k2,k3 from ${tableName} where k2 = 1 ORDER BY k3;"""
+
+
+    sql """ truncate table ${tableName}; """
+
+    sql """ insert into ${tableName} 
values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
+
+    sql """ delete from ${tableName} where k2=1 and k2=0;"""
+
+    qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """ select k2,k3 from ${tableName} where k2 = 1 ORDER BY k3;"""
+
+
+    sql """ truncate table ${tableName}; """
+
+    sql """ insert into ${tableName} 
values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
+
+    sql """ delete from ${tableName} where k2 is null and k3=00;"""
+
+    qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """ select k2,k3 from ${tableName} where k2 is null ORDER BY k3;"""
+
+    sql """delete from ${tableName} where k2 is not null and k3=11;"""
+
+    qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """select k2,k3 from ${tableName} where k2 is not null ORDER BY 
k3;"""
+
+    sql """delete from ${tableName} where k2=1 and k3=22;"""
+
+    qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """select k2,k3 from ${tableName} where k2=1 ORDER BY k3;"""
+
+    sql """delete from ${tableName} where k2!=0 and k3=33;"""
+
+    qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """select k2,k3 from ${tableName} where k2!=0 ORDER BY k3;"""
+
+    sql """delete from ${tableName} where k2 not in (0) and k3=44;"""
+
+    qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """select k2,k3 from ${tableName} where k2 not in (0) ORDER BY 
k3;"""
+
+    sql """delete from ${tableName} where k2=1 and k3 >= 11 and k3 <=200;"""
+
+    qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """select k2,k3 from ${tableName} where k3 = 201 ORDER BY k3;"""
+
+
+    sql """truncate table ${tableName};"""
+
+    sql """insert into ${tableName} 
values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
+
+    sql """delete from ${tableName} where k2=1 and k3 <=202 and k3 >= 33;"""
+
+    qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+    qt_sql """select k2,k3 from ${tableName} where k3 = 11 ORDER BY k3;"""
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to