This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new f1c84265a38 [Bug](predicate) fix wrong result of AcceptNullPredicate (#39497) f1c84265a38 is described below commit f1c84265a38a06bbdb0b700e9ef1211eaeaacf3d Author: Pxl <pxl...@qq.com> AuthorDate: Tue Aug 20 11:56:55 2024 +0800 [Bug](predicate) fix wrong result of AcceptNullPredicate (#39497) ## Proposed changes fix wrong result of AcceptNullPredicate --- be/src/olap/accept_null_predicate.h | 35 +++---- be/src/olap/shared_predicate.h | 4 +- .../data/nereids_arith_p0/topn/accept_null.out | 13 +++ .../nereids_arith_p0/topn/accept_null.groovy | 110 +++++++++++++++++++++ 4 files changed, 140 insertions(+), 22 deletions(-) diff --git a/be/src/olap/accept_null_predicate.h b/be/src/olap/accept_null_predicate.h index 89d26e2684c..112101f46dd 100644 --- a/be/src/olap/accept_null_predicate.h +++ b/be/src/olap/accept_null_predicate.h @@ -34,6 +34,7 @@ namespace doris { * but pass (set/return true) for NULL value rows. * * At parent, it's used for topn runtime predicate. + * Eg: original input indexs is '1,2,3,7,8,9' and value of index9 is null, we get nested predicate output index is '1,2,3', but we finally output '1,2,3,9' */ class AcceptNullPredicate : public ColumnPredicate { ENABLE_FACTORY_CREATOR(AcceptNullPredicate); @@ -44,8 +45,6 @@ public: PredicateType type() const override { return _nested->type(); } - void set_nested(ColumnPredicate* nested) { _nested.reset(nested); } - Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* roaring) const override { return _nested->evaluate(iterator, num_rows, roaring); @@ -64,11 +63,14 @@ public: void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, bool* flags) const override { if (column.has_null()) { + std::vector<uint8_t> original_flags(size); + memcpy(original_flags.data(), flags, size); + const auto& nullable_col = assert_cast<const vectorized::ColumnNullable&>(column); _nested->evaluate_and(nullable_col.get_nested_column(), sel, size, flags); const auto& nullmap = nullable_col.get_null_map_data(); for (uint16_t i = 0; i < size; ++i) { - flags[i] |= nullmap[sel[i]]; + flags[i] |= (original_flags[i] && nullmap[sel[i]]); } } else { _nested->evaluate_and(column, sel, size, flags); @@ -77,20 +79,7 @@ public: void evaluate_or(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, bool* flags) const override { - if (column.has_null()) { - const auto& nullable_col = assert_cast<const vectorized::ColumnNullable&>(column); - _nested->evaluate_or(nullable_col.get_nested_column(), sel, size, flags); - - // call evaluate_or and set true for NULL rows - for (uint16_t i = 0; i < size; ++i) { - uint16_t idx = sel[i]; - if (!flags[i] && nullable_col.is_null_at(idx)) { - flags[i] = true; - } - } - } else { - _nested->evaluate_or(column, sel, size, flags); - } + DCHECK(false) << "should not reach here"; } bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override { @@ -158,6 +147,8 @@ private: } // create selected_flags uint16_t max_idx = sel[size - 1]; + std::vector<uint16_t> old_sel(size); + memcpy(old_sel.data(), sel, sizeof(uint16_t) * size); const auto& nullable_col = assert_cast<const vectorized::ColumnNullable&>(column); // call nested predicate evaluate @@ -165,14 +156,18 @@ private: // process NULL values if (new_size < size) { - std::vector<uint8_t> selected(max_idx + 1); - memcpy(selected.data(), nullable_col.get_null_map_data().data(), - (max_idx + 1) * sizeof(bool)); + std::vector<uint8_t> selected(max_idx + 1, 0); + const auto* nullmap = nullable_col.get_null_map_data().data(); // add rows selected by _nested->evaluate for (uint16_t i = 0; i < new_size; ++i) { uint16_t row_idx = sel[i]; selected[row_idx] = true; } + // reset null from original data + for (uint16_t i = 0; i < size; ++i) { + uint16_t row_idx = old_sel[i]; + selected[row_idx] |= nullmap[row_idx]; + } // recaculate new_size and sel array new_size = 0; diff --git a/be/src/olap/shared_predicate.h b/be/src/olap/shared_predicate.h index 41b18e99ba4..2a83f7ef434 100644 --- a/be/src/olap/shared_predicate.h +++ b/be/src/olap/shared_predicate.h @@ -167,9 +167,9 @@ private: std::string _debug_string() const override { std::shared_lock<std::shared_mutex> lock(_mtx); if (!_nested) { - return "shared_predicate<unknow>"; + return "shared_predicate(unknow)"; } - return "shared_predicate<" + _nested->debug_string() + ">"; + return "shared_predicate(" + _nested->debug_string() + ")"; } mutable std::shared_mutex _mtx; diff --git a/regression-test/data/nereids_arith_p0/topn/accept_null.out b/regression-test/data/nereids_arith_p0/topn/accept_null.out new file mode 100644 index 00000000000..605b9d0b1a9 --- /dev/null +++ b/regression-test/data/nereids_arith_p0/topn/accept_null.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test -- +100 dd 100 0 +1000 dd 1000 0 +10000 dd 10000 0 +10001 dd 10001 0 +10002 dd 10002 0 +10003 dd 10003 0 +10004 dd 10004 0 +10005 dd 10005 0 +10006 dd 10006 0 +10007 dd 10007 0 + diff --git a/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy b/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy new file mode 100644 index 00000000000..09713c76172 --- /dev/null +++ b/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite ("accept_null") { + sql """ drop table IF EXISTS detail_tmp;""" + + sql """ + CREATE TABLE `detail_tmp` ( + `id` VARCHAR(512) NOT NULL, + `accident_no` VARCHAR(512) NULL, + `accident_type_name` VARCHAR(512) NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS AUTO + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_medium" = "hdd", + "storage_format" = "V2", + "inverted_index_storage_format" = "V1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728", + "enable_mow_light_delete" = "false" + ); + """ + + sql "insert into detail_tmp(id,accident_type_name,accident_no) select e1,'dd',e1 from (select 1 k1) as t lateral view explode_numbers(100000) tmp1 as e1;" + sql "delete from detail_tmp where accident_no <100;" + + def tablets = sql_return_maparray """ show tablets from detail_tmp; """ + + // before full compaction, there are 7 rowsets in all tablets. + for (def tablet : tablets) { + int rowsetCount = 0 + def (code, out, err) = curl("GET", tablet.CompactionStatus) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + } + + // trigger full compactions for all tablets by table id in ${tableName} + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + boolean disableAutoCompaction = true + for(int i=0;i<backendId_to_backendIP.keySet().size();i++){ + backend_id = backendId_to_backendIP.keySet()[i] + def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id)) + logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def configList = parseJson(out.trim()) + assert configList instanceof List + + for (Object ele in (List) configList) { + assert ele instanceof List<String> + if (((List<String>) ele)[0] == "disable_auto_compaction") { + disableAutoCompaction = Boolean.parseBoolean(((List<String>) ele)[2]) + } + } + } + + for (def tablet : tablets) { + String tablet_id = tablet.TabletId + def tablet_info = sql_return_maparray """ show tablet ${tablet_id}; """ + logger.info("tablet"+tablet_info) + def table_id = tablet_info[0].TableId + backend_id = tablet.BackendId + def times = 1 + def code, out, err + do{ + (code, out, err) = be_run_full_compaction_by_table_id(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), table_id) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + ++times + sleep(2000) + } while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10) + + def compactJson = parseJson(out.trim()) + if (compactJson.status.toLowerCase() == "fail") { + assertEquals(disableAutoCompaction, false) + logger.info("Compaction was done automatically!") + } + if (disableAutoCompaction) { + assertEquals("success", compactJson.status.toLowerCase()) + } + } + + qt_test "select id,accident_type_name,accident_no,__DORIS_DELETE_SIGN__ From detail_tmp where accident_type_name = 'dd' order by accident_no,id limit 10;" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org