This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f1c84265a38 [Bug](predicate) fix wrong result of AcceptNullPredicate 
(#39497)
f1c84265a38 is described below

commit f1c84265a38a06bbdb0b700e9ef1211eaeaacf3d
Author: Pxl <pxl...@qq.com>
AuthorDate: Tue Aug 20 11:56:55 2024 +0800

    [Bug](predicate) fix wrong result of AcceptNullPredicate (#39497)
    
    ## Proposed changes
    fix wrong result of AcceptNullPredicate
---
 be/src/olap/accept_null_predicate.h                |  35 +++----
 be/src/olap/shared_predicate.h                     |   4 +-
 .../data/nereids_arith_p0/topn/accept_null.out     |  13 +++
 .../nereids_arith_p0/topn/accept_null.groovy       | 110 +++++++++++++++++++++
 4 files changed, 140 insertions(+), 22 deletions(-)

diff --git a/be/src/olap/accept_null_predicate.h 
b/be/src/olap/accept_null_predicate.h
index 89d26e2684c..112101f46dd 100644
--- a/be/src/olap/accept_null_predicate.h
+++ b/be/src/olap/accept_null_predicate.h
@@ -34,6 +34,7 @@ namespace doris {
  *  but pass (set/return true) for NULL value rows.
  *
  * At parent, it's used for topn runtime predicate.
+ * Eg: original input indexs is '1,2,3,7,8,9' and value of index9 is null, we 
get nested predicate output index is '1,2,3', but we finally output '1,2,3,9'
 */
 class AcceptNullPredicate : public ColumnPredicate {
     ENABLE_FACTORY_CREATOR(AcceptNullPredicate);
@@ -44,8 +45,6 @@ public:
 
     PredicateType type() const override { return _nested->type(); }
 
-    void set_nested(ColumnPredicate* nested) { _nested.reset(nested); }
-
     Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
                     roaring::Roaring* roaring) const override {
         return _nested->evaluate(iterator, num_rows, roaring);
@@ -64,11 +63,14 @@ public:
     void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel, 
uint16_t size,
                       bool* flags) const override {
         if (column.has_null()) {
+            std::vector<uint8_t> original_flags(size);
+            memcpy(original_flags.data(), flags, size);
+
             const auto& nullable_col = assert_cast<const 
vectorized::ColumnNullable&>(column);
             _nested->evaluate_and(nullable_col.get_nested_column(), sel, size, 
flags);
             const auto& nullmap = nullable_col.get_null_map_data();
             for (uint16_t i = 0; i < size; ++i) {
-                flags[i] |= nullmap[sel[i]];
+                flags[i] |= (original_flags[i] && nullmap[sel[i]]);
             }
         } else {
             _nested->evaluate_and(column, sel, size, flags);
@@ -77,20 +79,7 @@ public:
 
     void evaluate_or(const vectorized::IColumn& column, const uint16_t* sel, 
uint16_t size,
                      bool* flags) const override {
-        if (column.has_null()) {
-            const auto& nullable_col = assert_cast<const 
vectorized::ColumnNullable&>(column);
-            _nested->evaluate_or(nullable_col.get_nested_column(), sel, size, 
flags);
-
-            // call evaluate_or and set true for NULL rows
-            for (uint16_t i = 0; i < size; ++i) {
-                uint16_t idx = sel[i];
-                if (!flags[i] && nullable_col.is_null_at(idx)) {
-                    flags[i] = true;
-                }
-            }
-        } else {
-            _nested->evaluate_or(column, sel, size, flags);
-        }
+        DCHECK(false) << "should not reach here";
     }
 
     bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& 
statistic) const override {
@@ -158,6 +147,8 @@ private:
             }
             // create selected_flags
             uint16_t max_idx = sel[size - 1];
+            std::vector<uint16_t> old_sel(size);
+            memcpy(old_sel.data(), sel, sizeof(uint16_t) * size);
 
             const auto& nullable_col = assert_cast<const 
vectorized::ColumnNullable&>(column);
             // call nested predicate evaluate
@@ -165,14 +156,18 @@ private:
 
             // process NULL values
             if (new_size < size) {
-                std::vector<uint8_t> selected(max_idx + 1);
-                memcpy(selected.data(), 
nullable_col.get_null_map_data().data(),
-                       (max_idx + 1) * sizeof(bool));
+                std::vector<uint8_t> selected(max_idx + 1, 0);
+                const auto* nullmap = nullable_col.get_null_map_data().data();
                 // add rows selected by _nested->evaluate
                 for (uint16_t i = 0; i < new_size; ++i) {
                     uint16_t row_idx = sel[i];
                     selected[row_idx] = true;
                 }
+                // reset null from original data
+                for (uint16_t i = 0; i < size; ++i) {
+                    uint16_t row_idx = old_sel[i];
+                    selected[row_idx] |= nullmap[row_idx];
+                }
 
                 // recaculate new_size and sel array
                 new_size = 0;
diff --git a/be/src/olap/shared_predicate.h b/be/src/olap/shared_predicate.h
index 41b18e99ba4..2a83f7ef434 100644
--- a/be/src/olap/shared_predicate.h
+++ b/be/src/olap/shared_predicate.h
@@ -167,9 +167,9 @@ private:
     std::string _debug_string() const override {
         std::shared_lock<std::shared_mutex> lock(_mtx);
         if (!_nested) {
-            return "shared_predicate<unknow>";
+            return "shared_predicate(unknow)";
         }
-        return "shared_predicate<" + _nested->debug_string() + ">";
+        return "shared_predicate(" + _nested->debug_string() + ")";
     }
 
     mutable std::shared_mutex _mtx;
diff --git a/regression-test/data/nereids_arith_p0/topn/accept_null.out 
b/regression-test/data/nereids_arith_p0/topn/accept_null.out
new file mode 100644
index 00000000000..605b9d0b1a9
--- /dev/null
+++ b/regression-test/data/nereids_arith_p0/topn/accept_null.out
@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !test --
+100    dd      100     0
+1000   dd      1000    0
+10000  dd      10000   0
+10001  dd      10001   0
+10002  dd      10002   0
+10003  dd      10003   0
+10004  dd      10004   0
+10005  dd      10005   0
+10006  dd      10006   0
+10007  dd      10007   0
+
diff --git a/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy 
b/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy
new file mode 100644
index 00000000000..09713c76172
--- /dev/null
+++ b/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite ("accept_null") {
+   sql """ drop table IF EXISTS detail_tmp;"""
+
+    sql """
+            CREATE TABLE `detail_tmp` (
+            `id` VARCHAR(512) NOT NULL,
+            `accident_no` VARCHAR(512) NULL,
+            `accident_type_name` VARCHAR(512) NULL
+            ) ENGINE=OLAP
+            UNIQUE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "min_load_replica_num" = "-1",
+            "is_being_synced" = "false",
+            "storage_medium" = "hdd",
+            "storage_format" = "V2",
+            "inverted_index_storage_format" = "V1",
+            "enable_unique_key_merge_on_write" = "true",
+            "light_schema_change" = "true",
+            "disable_auto_compaction" = "false",
+            "enable_single_replica_compaction" = "false",
+            "group_commit_interval_ms" = "10000",
+            "group_commit_data_bytes" = "134217728",
+            "enable_mow_light_delete" = "false"
+            );
+        """
+
+    sql "insert into detail_tmp(id,accident_type_name,accident_no) select 
e1,'dd',e1 from (select 1 k1) as t lateral view explode_numbers(100000) tmp1 as 
e1;"
+    sql "delete from detail_tmp where accident_no <100;"
+
+   def tablets = sql_return_maparray """ show tablets from detail_tmp; """
+
+   // before full compaction, there are 7 rowsets in all tablets.
+   for (def tablet : tablets) {
+      int rowsetCount = 0
+      def (code, out, err) = curl("GET", tablet.CompactionStatus)
+      logger.info("Show tablets status: code=" + code + ", out=" + out + ", 
err=" + err)
+      assertEquals(code, 0)
+      def tabletJson = parseJson(out.trim())
+      assert tabletJson.rowsets instanceof List
+   }
+
+   // trigger full compactions for all tablets by table id in ${tableName}
+   def backendId_to_backendIP = [:]
+   def backendId_to_backendHttpPort = [:]
+   getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);
+   boolean disableAutoCompaction = true
+   for(int i=0;i<backendId_to_backendIP.keySet().size();i++){
+      backend_id = backendId_to_backendIP.keySet()[i]
+      def (code, out, err) = 
show_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id))
+      logger.info("Show config: code=" + code + ", out=" + out + ", err=" + 
err)
+      assertEquals(code, 0)
+      def configList = parseJson(out.trim())
+      assert configList instanceof List
+
+      for (Object ele in (List) configList) {
+            assert ele instanceof List<String>
+            if (((List<String>) ele)[0] == "disable_auto_compaction") {
+               disableAutoCompaction = Boolean.parseBoolean(((List<String>) 
ele)[2])
+            }
+      }
+   }
+
+   for (def tablet : tablets) {
+      String tablet_id = tablet.TabletId
+      def tablet_info = sql_return_maparray """ show tablet ${tablet_id}; """
+      logger.info("tablet"+tablet_info)
+      def table_id = tablet_info[0].TableId
+      backend_id = tablet.BackendId
+      def times = 1
+      def code, out, err
+      do{
+            (code, out, err) = 
be_run_full_compaction_by_table_id(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), table_id)
+            logger.info("Run compaction: code=" + code + ", out=" + out + ", 
err=" + err)
+            ++times
+            sleep(2000)
+      } while (parseJson(out.trim()).status.toLowerCase()!="success" && 
times<=10)
+
+      def compactJson = parseJson(out.trim())
+      if (compactJson.status.toLowerCase() == "fail") {
+            assertEquals(disableAutoCompaction, false)
+            logger.info("Compaction was done automatically!")
+      }
+      if (disableAutoCompaction) {
+            assertEquals("success", compactJson.status.toLowerCase())
+      }
+   }
+
+    qt_test "select id,accident_type_name,accident_no,__DORIS_DELETE_SIGN__ 
From detail_tmp where accident_type_name = 'dd'  order by accident_no,id limit 
10;"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to