This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b998365ad167c5b9a1f2f98f511b0939bfa64241
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Fri Aug 23 17:36:27 2024 +0800

    [fix](agg function) incorrect result of map agg (#39743)
    
    ## Proposed changes
    
    In the function `void add(const Field& key_, const Field& value)`, it
    should not return while the key exists in the set.
---
 .../aggregate_functions/aggregate_function_map.h   | 15 +++++------
 .../data/query_p0/aggregate/map_agg.out            |  5 ++++
 .../suites/query_p0/aggregate/map_agg.groovy       | 31 ++++++++++++++++++++++
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.h 
b/be/src/vec/aggregate_functions/aggregate_function_map.h
index ca962bd3207..83e96a8d236 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_map.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_map.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include <parallel_hashmap/phmap.h>
-#include <string.h>
 
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/aggregate_functions/aggregate_function_simple_factory.h"
@@ -92,7 +91,7 @@ struct AggregateFunctionMapAggData {
             }
 
             if (UNLIKELY(_map.find(key) != _map.end())) {
-                return;
+                continue;
             }
 
             key.data = _arena.insert(key.data, key.size);
@@ -161,9 +160,7 @@ struct AggregateFunctionMapAggData {
         StringRef key;
         for (size_t i = 0; i < size; i++) {
             read_binary(key, buf);
-            if (_map.find(key) != _map.cend()) {
-                continue;
-            }
+            DCHECK(_map.find(key) == _map.cend());
             key.data = _arena.insert(key.data, key.size);
             assert_cast<KeyColumnType&, 
TypeCheckOnRelease::DISABLE>(*_key_column)
                     .insert_data(key.data, key.size);
@@ -208,9 +205,9 @@ public:
     void add(AggregateDataPtr __restrict place, const IColumn** columns, 
ssize_t row_num,
              Arena* arena) const override {
         if (columns[0]->is_nullable()) {
-            auto& nullable_col =
+            const auto& nullable_col =
                     assert_cast<const ColumnNullable&, 
TypeCheckOnRelease::DISABLE>(*columns[0]);
-            auto& nullable_map = nullable_col.get_null_map_data();
+            const auto& nullable_map = nullable_col.get_null_map_data();
             if (nullable_map[row_num]) {
                 return;
             }
@@ -267,7 +264,7 @@ public:
 
     void deserialize_from_column(AggregateDataPtr places, const IColumn& 
column, Arena* arena,
                                  size_t num_rows) const override {
-        auto& col = assert_cast<const ColumnMap&>(column);
+        const auto& col = assert_cast<const ColumnMap&>(column);
         auto* data = &(this->data(places));
         for (size_t i = 0; i != num_rows; ++i) {
             auto map = doris::vectorized::get<Map>(col[i]);
@@ -298,7 +295,7 @@ public:
                                                  Arena* arena) const override {
         DCHECK(end <= column.size() && begin <= end)
                 << ", begin:" << begin << ", end:" << end << ", 
column.size():" << column.size();
-        auto& col = assert_cast<const ColumnMap&>(column);
+        const auto& col = assert_cast<const ColumnMap&>(column);
         for (size_t i = begin; i <= end; ++i) {
             auto map = doris::vectorized::get<Map>(col[i]);
             this->data(place).add(map[0], map[1]);
diff --git a/regression-test/data/query_p0/aggregate/map_agg.out 
b/regression-test/data/query_p0/aggregate/map_agg.out
index c7db1dcaeae..186e888b57f 100644
--- a/regression-test/data/query_p0/aggregate/map_agg.out
+++ b/regression-test/data/query_p0/aggregate/map_agg.out
@@ -43,3 +43,8 @@ V5_3
 -- !multi --
 1      2
 
+-- !test_dumplicate --
+1      \N
+2      bddd
+3      \N
+
diff --git a/regression-test/suites/query_p0/aggregate/map_agg.groovy 
b/regression-test/suites/query_p0/aggregate/map_agg.groovy
index 351c077df10..385a40b065f 100644
--- a/regression-test/suites/query_p0/aggregate/map_agg.groovy
+++ b/regression-test/suites/query_p0/aggregate/map_agg.groovy
@@ -322,4 +322,35 @@ suite("map_agg") {
                , day
         ) t order by 1, 2;
     """
+
+    sql "DROP TABLE IF EXISTS `test_map_agg_2`;"
+    sql """
+        CREATE TABLE `test_map_agg_2` (
+        `k1` int NULL,
+        `k2` int NULL,
+        `v1` text NULL,
+        `v2` text NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`k1`, `k2`)
+        DISTRIBUTED BY HASH(`k1`) BUCKETS 4
+        PROPERTIES ( 'replication_num' = '1');
+    """
+
+    sql """
+        insert into `test_map_agg_2` values
+            (    3 ,    1 , 'k'    , 'j'    ),
+            (    3 ,    2 , 'a'    , 'a3'   ),
+            (    5 ,    2 , 'a'    , 'a5'   ),
+            (    1 ,    1 , 'ee'   , 'nn'   ),
+            (    1 ,    1 , 'a'    , 'b'    ),
+            (    1 ,    2 , 'a'    , 'b'    ),
+            (    1 ,    3 , 'c'    , 'c'    ),
+            (    2 ,    1 , 'e'    , 'f'    ),
+            (    2 ,    2 , 'a'    , 'a2'   ),
+            (    4 ,    2 , 'b'    , 'bddd' ),
+            (    4 ,    2 , 'a'    , 'a4'   );
+    """
+
+    sql "set experimental_ignore_storage_data_distribution = 0;"
+    qt_test_dumplicate "select k2, m['b'] from (select k2, map_agg(v1, v2) m 
from `test_map_agg_2` group  by k2) a order by k2;"
  }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to