This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit b998365ad167c5b9a1f2f98f511b0939bfa64241 Author: Jerry Hu <mrh...@gmail.com> AuthorDate: Fri Aug 23 17:36:27 2024 +0800 [fix](agg function) incorrect result of map agg (#39743) ## Proposed changes In the function `void add(const Field& key_, const Field& value)`, it should not return while the key exists in the set. --- .../aggregate_functions/aggregate_function_map.h | 15 +++++------ .../data/query_p0/aggregate/map_agg.out | 5 ++++ .../suites/query_p0/aggregate/map_agg.groovy | 31 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.h b/be/src/vec/aggregate_functions/aggregate_function_map.h index ca962bd3207..83e96a8d236 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_map.h +++ b/be/src/vec/aggregate_functions/aggregate_function_map.h @@ -18,7 +18,6 @@ #pragma once #include <parallel_hashmap/phmap.h> -#include <string.h> #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" @@ -92,7 +91,7 @@ struct AggregateFunctionMapAggData { } if (UNLIKELY(_map.find(key) != _map.end())) { - return; + continue; } key.data = _arena.insert(key.data, key.size); @@ -161,9 +160,7 @@ struct AggregateFunctionMapAggData { StringRef key; for (size_t i = 0; i < size; i++) { read_binary(key, buf); - if (_map.find(key) != _map.cend()) { - continue; - } + DCHECK(_map.find(key) == _map.cend()); key.data = _arena.insert(key.data, key.size); assert_cast<KeyColumnType&, TypeCheckOnRelease::DISABLE>(*_key_column) .insert_data(key.data, key.size); @@ -208,9 +205,9 @@ public: void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena* arena) const override { if (columns[0]->is_nullable()) { - auto& nullable_col = + const auto& nullable_col = assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(*columns[0]); - auto& nullable_map = nullable_col.get_null_map_data(); + const auto& nullable_map = nullable_col.get_null_map_data(); if (nullable_map[row_num]) { return; } @@ -267,7 +264,7 @@ public: void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, size_t num_rows) const override { - auto& col = assert_cast<const ColumnMap&>(column); + const auto& col = assert_cast<const ColumnMap&>(column); auto* data = &(this->data(places)); for (size_t i = 0; i != num_rows; ++i) { auto map = doris::vectorized::get<Map>(col[i]); @@ -298,7 +295,7 @@ public: Arena* arena) const override { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); - auto& col = assert_cast<const ColumnMap&>(column); + const auto& col = assert_cast<const ColumnMap&>(column); for (size_t i = begin; i <= end; ++i) { auto map = doris::vectorized::get<Map>(col[i]); this->data(place).add(map[0], map[1]); diff --git a/regression-test/data/query_p0/aggregate/map_agg.out b/regression-test/data/query_p0/aggregate/map_agg.out index c7db1dcaeae..186e888b57f 100644 --- a/regression-test/data/query_p0/aggregate/map_agg.out +++ b/regression-test/data/query_p0/aggregate/map_agg.out @@ -43,3 +43,8 @@ V5_3 -- !multi -- 1 2 +-- !test_dumplicate -- +1 \N +2 bddd +3 \N + diff --git a/regression-test/suites/query_p0/aggregate/map_agg.groovy b/regression-test/suites/query_p0/aggregate/map_agg.groovy index 351c077df10..385a40b065f 100644 --- a/regression-test/suites/query_p0/aggregate/map_agg.groovy +++ b/regression-test/suites/query_p0/aggregate/map_agg.groovy @@ -322,4 +322,35 @@ suite("map_agg") { , day ) t order by 1, 2; """ + + sql "DROP TABLE IF EXISTS `test_map_agg_2`;" + sql """ + CREATE TABLE `test_map_agg_2` ( + `k1` int NULL, + `k2` int NULL, + `v1` text NULL, + `v2` text NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`, `k2`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 4 + PROPERTIES ( 'replication_num' = '1'); + """ + + sql """ + insert into `test_map_agg_2` values + ( 3 , 1 , 'k' , 'j' ), + ( 3 , 2 , 'a' , 'a3' ), + ( 5 , 2 , 'a' , 'a5' ), + ( 1 , 1 , 'ee' , 'nn' ), + ( 1 , 1 , 'a' , 'b' ), + ( 1 , 2 , 'a' , 'b' ), + ( 1 , 3 , 'c' , 'c' ), + ( 2 , 1 , 'e' , 'f' ), + ( 2 , 2 , 'a' , 'a2' ), + ( 4 , 2 , 'b' , 'bddd' ), + ( 4 , 2 , 'a' , 'a4' ); + """ + + sql "set experimental_ignore_storage_data_distribution = 0;" + qt_test_dumplicate "select k2, m['b'] from (select k2, map_agg(v1, v2) m from `test_map_agg_2` group by k2) a order by k2;" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org