This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e0cd8599d2 [fix](delete) fix delete from bug which can get wrong result (#17146) e0cd8599d2 is described below commit e0cd8599d2c8ddfcb3162f77609892ef3fefd78c Author: xueweizhang <zxw520bl...@163.com> AuthorDate: Tue Feb 28 09:20:10 2023 +0800 [fix](delete) fix delete from bug which can get wrong result (#17146) 理论上,如果是两次独立的删除,比如delete from table where a=1; delete from table where a=2;其实这个地方应该可以使用的,但是目前的代码,是把所有不同版本的delete predicates和不同列的delete predicates都放到一起了,失去了版本信息、失去了谓词间可能是and的关系,统一弱化成了delete predicates都是独立的,有一个delete predicates满足条件,就把page都去掉。 这个pr的修改方式,就是在当前代码的基础上,当只有一个delete predicate的时候才能保证后续淘汰page的正确性,所以这里一律加了 == 1的判断才传递delete predicates。 如果要把不同版本的delete predicates和不同列的delete predicates作为完整和严谨的逻辑去判断page,需要修改的设计就有点多了,目前的方案算是一种优先解决bug的思路,后续可以进一步把delete predicates这块加速zone判断进行page淘汰的逻辑完善,提高delete predicates使用的场景。 --- be/src/olap/delete_handler.cpp | 35 ++- be/src/olap/delete_handler.h | 2 +- be/src/olap/iterators.h | 2 +- be/src/olap/rowset/beta_rowset_reader.cpp | 4 +- be/src/olap/rowset/segment_v2/segment.cpp | 2 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +- .../data/delete_p0/test_zone_map_delete.out | 286 +++++++++++++++++++++ .../suites/delete_p0/test_zone_map_delete.groovy | 97 +++++++ 8 files changed, 405 insertions(+), 27 deletions(-) diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 0b755ae890..313d48dfda 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -311,8 +311,8 @@ void DeleteHandler::finalize() { void DeleteHandler::get_delete_conditions_after_version( int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr, - std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>* col_id_to_del_predicates) - const { + std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>* + del_predicates_for_zone_map) const { for (auto& del_cond : _del_conds) { if (del_cond.filter_version > version) { // now, only query support delete column predicate operator @@ -322,33 +322,28 @@ void DeleteHandler::get_delete_conditions_after_version( new SingleColumnBlockPredicate(del_cond.column_predicate_vec[0]); and_block_column_predicate_ptr->add_column_predicate( single_column_block_predicate); - if (col_id_to_del_predicates->count( + if (del_predicates_for_zone_map->count( del_cond.column_predicate_vec[0]->column_id()) < 1) { - col_id_to_del_predicates->insert( + del_predicates_for_zone_map->insert( {del_cond.column_predicate_vec[0]->column_id(), std::vector<const ColumnPredicate*> {}}); } - (*col_id_to_del_predicates)[del_cond.column_predicate_vec[0]->column_id()] + (*del_predicates_for_zone_map)[del_cond.column_predicate_vec[0]->column_id()] .push_back(del_cond.column_predicate_vec[0]); } else { auto or_column_predicate = new OrBlockColumnPredicate(); // build or_column_predicate - std::for_each( - del_cond.column_predicate_vec.cbegin(), - del_cond.column_predicate_vec.cend(), - [&or_column_predicate, - col_id_to_del_predicates](const ColumnPredicate* predicate) { - if (col_id_to_del_predicates->count(predicate->column_id()) < 1) { - col_id_to_del_predicates->insert( - {predicate->column_id(), - std::vector<const ColumnPredicate*> {}}); - } - (*col_id_to_del_predicates)[predicate->column_id()].push_back( - predicate); - or_column_predicate->add_column_predicate( - new SingleColumnBlockPredicate(predicate)); - }); + // when delete from where a = 1 and b = 2, we can not use del_predicates_for_zone_map to filter zone page, + // so here do not put predicate to del_predicates_for_zone_map, + // refer #17145 for more details. + // // TODO: need refactor design and code to use more version delete and more column delete to filter zone page. + std::for_each(del_cond.column_predicate_vec.cbegin(), + del_cond.column_predicate_vec.cend(), + [&or_column_predicate](const ColumnPredicate* predicate) { + or_column_predicate->add_column_predicate( + new SingleColumnBlockPredicate(predicate)); + }); and_block_column_predicate_ptr->add_column_predicate(or_column_predicate); } } diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h index 815635ec53..0b33d638e1 100644 --- a/be/src/olap/delete_handler.h +++ b/be/src/olap/delete_handler.h @@ -101,7 +101,7 @@ public: void get_delete_conditions_after_version( int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr, std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>* - col_id_to_del_predicates) const; + del_predicates_for_zone_map) const; private: // Use regular expression to extract 'column_name', 'op' and 'operands' diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index bb17a1fecf..58fd8f3ef7 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -101,7 +101,7 @@ public: std::vector<ColumnPredicate*> column_predicates; std::vector<ColumnPredicate*> column_predicates_except_leafnode_of_andnode; std::unordered_map<int32_t, std::shared_ptr<AndBlockColumnPredicate>> col_id_to_predicates; - std::unordered_map<int32_t, std::vector<const ColumnPredicate*>> col_id_to_del_predicates; + std::unordered_map<int32_t, std::vector<const ColumnPredicate*>> del_predicates_for_zone_map; TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE; // REQUIRED (null is not allowed) diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 01d86965e5..e57bda9fea 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -38,7 +38,7 @@ void BetaRowsetReader::reset_read_options() { _read_options.delete_condition_predicates = std::make_shared<AndBlockColumnPredicate>(); _read_options.column_predicates.clear(); _read_options.col_id_to_predicates.clear(); - _read_options.col_id_to_del_predicates.clear(); + _read_options.del_predicates_for_zone_map.clear(); _read_options.key_ranges.clear(); } @@ -83,7 +83,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context if (read_context->delete_handler != nullptr) { read_context->delete_handler->get_delete_conditions_after_version( _rowset->end_version(), _read_options.delete_condition_predicates.get(), - &_read_options.col_id_to_del_predicates); + &_read_options.del_predicates_for_zone_map); } std::vector<uint32_t> read_columns; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index c0f4d7fa89..ea0e6b44cd 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -129,7 +129,7 @@ Status Segment::new_iterator(const Schema& schema, const StorageReadOptions& rea } RETURN_IF_ERROR(load_index()); - if (read_options.col_id_to_del_predicates.empty() && + if (read_options.delete_condition_predicates->num_of_column_predicate() == 0 && read_options.push_down_agg_type_opt != TPushAggOp::NONE) { iter->reset(vectorized::new_vstatistics_iterator(this->shared_from_this(), schema)); } else { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 73feeb3539..e75a48ac8a 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -380,8 +380,8 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row DCHECK(_opts.col_id_to_predicates.count(cid) > 0); RETURN_IF_ERROR(_column_iterators[_schema.unique_id(cid)]->get_row_ranges_by_zone_map( _opts.col_id_to_predicates[cid].get(), - _opts.col_id_to_del_predicates.count(cid) > 0 - ? &(_opts.col_id_to_del_predicates[cid]) + _opts.del_predicates_for_zone_map.count(cid) > 0 + ? &(_opts.del_predicates_for_zone_map[cid]) : nullptr, &column_row_ranges)); // intersect different columns's row ranges to get final row ranges by zone map diff --git a/regression-test/data/delete_p0/test_zone_map_delete.out b/regression-test/data/delete_p0/test_zone_map_delete.out index d119d5e8de..ba300220f4 100644 --- a/regression-test/data/delete_p0/test_zone_map_delete.out +++ b/regression-test/data/delete_p0/test_zone_map_delete.out @@ -57,3 +57,289 @@ -- !sql -- +-- !sql -- + +-- !sql -- + +-- !sql -- +1 11 +1 22 +1 33 +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 11 +1 22 +1 33 +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 11 +1 22 +1 33 +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- + +-- !sql -- +1 22 +1 33 +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 22 +1 33 +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 33 +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 33 +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 44 +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 55 +1 66 +1 77 +1 88 +1 99 +1 100 +1 101 +1 102 +1 111 +1 122 +1 133 +1 144 +1 155 +1 166 +1 177 +1 188 +1 199 +1 200 +1 201 +1 202 + +-- !sql -- +1 201 +1 202 + +-- !sql -- +1 201 + +-- !sql -- +1 11 +1 22 + +-- !sql -- +1 11 + diff --git a/regression-test/suites/delete_p0/test_zone_map_delete.groovy b/regression-test/suites/delete_p0/test_zone_map_delete.groovy index 5e8beb375d..f79830c815 100644 --- a/regression-test/suites/delete_p0/test_zone_map_delete.groovy +++ b/regression-test/suites/delete_p0/test_zone_map_delete.groovy @@ -54,4 +54,101 @@ suite("test_zone_map_delete") { qt_sql """select * from ${tableName} ORDER BY k1;""" sql """ DROP TABLE IF EXISTS ${tableName} """ + + // ========================= + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `k1` bigint(20) NULL, + `k2` largeint(40) NULL, + `k3` largeint(40) NULL + ) ENGINE=OLAP + AGGREGATE KEY(`k1`, `k2`, `k3`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "true" + ); + """ + + sql """ truncate table ${tableName}; """ + + sql """ insert into ${tableName} values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);""" + + sql """ delete from ${tableName} where k2=0;""" + + sql """ delete from ${tableName} where k2=1;""" + + qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """ select k2,k3 from ${tableName} where k2 = 1 ORDER BY k3;""" + + + sql """ truncate table ${tableName}; """ + + sql """ insert into ${tableName} values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);""" + + sql """ delete from ${tableName} where k2=1 and k2=0;""" + + qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """ select k2,k3 from ${tableName} where k2 = 1 ORDER BY k3;""" + + + sql """ truncate table ${tableName}; """ + + sql """ insert into ${tableName} values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);""" + + sql """ delete from ${tableName} where k2 is null and k3=00;""" + + qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """ select k2,k3 from ${tableName} where k2 is null ORDER BY k3;""" + + sql """delete from ${tableName} where k2 is not null and k3=11;""" + + qt_sql """select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """select k2,k3 from ${tableName} where k2 is not null ORDER BY k3;""" + + sql """delete from ${tableName} where k2=1 and k3=22;""" + + qt_sql """select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """select k2,k3 from ${tableName} where k2=1 ORDER BY k3;""" + + sql """delete from ${tableName} where k2!=0 and k3=33;""" + + qt_sql """select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """select k2,k3 from ${tableName} where k2!=0 ORDER BY k3;""" + + sql """delete from ${tableName} where k2 not in (0) and k3=44;""" + + qt_sql """select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """select k2,k3 from ${tableName} where k2 not in (0) ORDER BY k3;""" + + sql """delete from ${tableName} where k2=1 and k3 >= 11 and k3 <=200;""" + + qt_sql """select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """select k2,k3 from ${tableName} where k3 = 201 ORDER BY k3;""" + + + sql """truncate table ${tableName};""" + + sql """insert into ${tableName} values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);""" + + sql """delete from ${tableName} where k2=1 and k3 <=202 and k3 >= 33;""" + + qt_sql """select k2,k3 from ${tableName} ORDER BY k3;""" + + qt_sql """select k2,k3 from ${tableName} where k3 = 11 ORDER BY k3;""" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org