This is an automated email from the ASF dual-hosted git repository.

zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6c668d01a22 [Bug](distinct) fix distinct function with over return 
error result (#51875)
6c668d01a22 is described below

commit 6c668d01a22da61a98eb0d3bd8cc17351a7cd4c2
Author: zhangstar333 <[email protected]>
AuthorDate: Wed Jun 25 19:38:35 2025 +0800

    [Bug](distinct) fix distinct function with over return error result (#51875)
    
    ### What problem does this PR solve?
    Problem Summary:
    before the result is not correctly, as after insert the rows, should
    reset all state of nested function.
    So the result of the next line will not be affected by the previous one
    ```
    mysql> select id, v1, multi_distinct_group_concat(v2) over() from multi;
    +------+------+----------------------------------------+
    | id   | v1   | multi_distinct_group_concat(v2) over() |
    +------+------+----------------------------------------+
    |    3 |    1 | a                                      |
    |    1 |    1 | a,a                                    |
    |    1 |    1 | a,a,a                                  |
    |    2 |    1 | a,a,a,a                                |
    +------+------+----------------------------------------+
    4 rows in set (0.21 sec)
    
    mysql> select id, v1, multi_distinct_sum(v1) over() from multi;
    +------+------+-------------------------------+
    | id   | v1   | multi_distinct_sum(v1) over() |
    +------+------+-------------------------------+
    |    1 |    1 |                             1 |
    |    1 |    1 |                             2 |
    |    2 |    1 |                             3 |
    |    3 |    1 |                             4 |
    +------+------+-------------------------------+
    4 rows in set (0.06 sec)
    ```
---
 .../aggregate_function_distinct.h                  |  13 ++++++++++++
 .../test_aggregate_window_functions.out            | Bin 21134 -> 21133 bytes
 .../sql_functions/window_functions/test_sum.out    | Bin 143 -> 242 bytes
 .../sql_functions/window_functions/test_sum.groovy |  22 +++++++++++++++++++++
 4 files changed, 35 insertions(+)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h 
b/be/src/vec/aggregate_functions/aggregate_function_distinct.h
index ac49f8213c6..616d8a1e9a9 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h
@@ -65,6 +65,8 @@ struct AggregateFunctionDistinctSingleNumericData {
     using Self = AggregateFunctionDistinctSingleNumericData<T, stable>;
     Container data;
 
+    void clear() { data.clear(); }
+
     void add(const IColumn** columns, size_t /* columns_num */, size_t 
row_num, Arena*) {
         const auto& vec =
                 assert_cast<const ColumnVector<T>&, 
TypeCheckOnRelease::DISABLE>(*columns[0])
@@ -135,6 +137,8 @@ struct AggregateFunctionDistinctGenericData {
     using Self = AggregateFunctionDistinctGenericData;
     Container data;
 
+    void clear() { data.clear(); }
+
     void merge(const Self& rhs, Arena* arena) {
         DCHECK(!stable);
         if constexpr (!stable) {
@@ -322,6 +326,15 @@ public:
         nested_func->add_batch_single_place(arguments[0]->size(), 
get_nested_place(place),
                                             arguments_raw.data(), &arena);
         nested_func->insert_result_into(get_nested_place(place), to);
+        // for distinct agg function, the real calculate is 
add_batch_single_place at last step of insert_result_into function.
+        // but with distinct agg and over() window function together, the 
result will be inserted into many times with different rows
+        // so we need to clear the data, thus not to affect the next 
insert_result_into
+        this->data(place).clear();
+    }
+
+    void reset(AggregateDataPtr place) const override {
+        this->data(place).clear();
+        nested_func->reset(get_nested_place(place));
     }
 
     size_t size_of_data() const override { return prefix_size + 
nested_func->size_of_data(); }
diff --git 
a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
 
b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
index ae6d1ad35a2..006cea921b6 100644
Binary files 
a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
 and 
b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
 differ
diff --git 
a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out 
b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out
index 9185f64fa6e..84f4bccb2d1 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out and 
b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out 
differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
 
b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
index 3611400568d..e61f586181a 100644
--- 
a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
@@ -21,5 +21,27 @@ suite("test_sum") {
                       (partition by k1 order by k3 range between current row 
and unbounded following) as w 
                   from test_query_db.test order by k1, w
               """
+
+    sql "create database if not exists multi_db"
+    sql "use multi_db"
+    sql "DROP TABLE IF EXISTS multi"
+    sql """
+        CREATE TABLE multi (
+            id int,
+            v1 int,
+            v2 varchar
+            ) ENGINE = OLAP
+            DUPLICATE KEY(id) COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(id) BUCKETS 2
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            );
+        """ 
+    sql """
+        insert into multi values (1, 1, 'a'),(1, 1, 'a'), (2, 1, 'a'), (3, 1, 
'a');
+        """ 
+    qt_sql_window_muti1 """   select multi_distinct_group_concat(v2) over() 
from multi; """
+    qt_sql_window_muti2 """   select multi_distinct_sum(v1) over() from multi; 
"""
+    qt_sql_window_muti3 """   select multi_distinct_count(v1) over() from 
multi; """
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to