This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to tag 2.0.15.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 4a6b26e1921d72ca9c13c593485cc2261a197a37
Author: Xin Liao <liaoxin...@126.com>
AuthorDate: Mon Oct 21 11:29:17 2024 +0800

    [fix](agg) Aggregating string types with null values may result in 
incorrect result (#42067)
    
    Aggregating string types with null values may result in incorrect result
    because using the replace_column_data function can cause incorrect
    offsets in the column.
    
    A reproducible caseļ¼š
    ```
    CREATE TABLE `test_scan_keys_with_bool_type` (
        `col1` tinyint NOT NULL,
        `col2` int NOT NULL,
        `col3` tinyint NOT NULL,
        `col5` boolean REPLACE NOT NULL,
        `col4` datetime(2) REPLACE NOT NULL,
        `col6` double REPLACE_IF_NOT_NULL NULL,
        `col7` varchar(100) REPLACE_IF_NOT_NULL NULL
    ) ENGINE=OLAP
    AGGREGATE KEY(`col1`, `col2`, `col3`)
    DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
    PROPERTIES (
        "replication_allocation" = "tag.location.default: 1",
        "disable_auto_compaction" = "true"
    );
    
    insert into test_scan_keys_with_bool_type values
    ( -100 ,    1 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , -1299962421.904282 
, NULL ),
    ( -100 ,    0 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , -1299962421.904282 
, "hi" ),
    ( -100 ,    1 ,   92 ,    1 , '2024-02-16 04:37:37.00' ,   23423423.0324234 
, NULL );
    
     insert into test_scan_keys_with_bool_type values
    ( -100 ,    1 ,  1 ,    1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , 
"doris" );
    
    MySQL [test]> select * from test_scan_keys_with_bool_type;
    
+------+------+------+------+------------------------+---------------------+-------+
    | col1 | col2 | col3 | col5 | col4                   | col6                
| col7  |
    
+------+------+------+------+------------------------+---------------------+-------+
    | -100 |    0 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 
| hi    |
    | -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 
| NULL  |
    | -100 |    1 |    1 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 
| hidor |
    | -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |    23423423.0324234 
| NULL  |
    
+------+------+------+------+------------------------+---------------------+-------+
    4 rows in set (0.057 sec)
    
    MySQL [test]> set skip_storage_engine_merge = true; select * from 
test_scan_keys_with_bool_type;
    
+------+------+------+------+------------------------+---------------------+-------+
    | col1 | col2 | col3 | col5 | col4                   | col6                
| col7  |
    
+------+------+------+------+------------------------+---------------------+-------+
    | -100 |    0 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 
| hi    |
    | -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 
| NULL  |
    | -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |    23423423.0324234 
| NULL  |
    | -100 |    1 |    1 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 
| doris |
    
+------+------+------+------+------------------------+---------------------+-------+
    4 rows in set (0.023 sec)
    ```
    https://github.com/apache/doris/pull/33493 By supporting variant type
    aggregation, this issue has been resolved.So versions after 2.1 do not
    have this issue.
---
 be/src/vec/olap/block_reader.cpp                   |  4 +--
 .../aggregate/test_aggregate_table.out             |  6 ++++
 .../aggregate/test_aggregate_table.groovy          | 32 ++++++++++++++++++++++
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp
index d69efa181e8..5ff2890ab37 100644
--- a/be/src/vec/olap/block_reader.cpp
+++ b/be/src/vec/olap/block_reader.cpp
@@ -470,10 +470,10 @@ size_t BlockReader::_copy_agg_data() {
         auto& dst_column = _stored_data_columns[idx];
         if (_stored_has_variable_length_tag[idx]) {
             //variable length type should replace ordered
+            dst_column->clear();
             for (size_t i = 0; i < copy_size; i++) {
                 auto& ref = _stored_row_ref[i];
-                
dst_column->replace_column_data(*ref.block->get_by_position(idx).column,
-                                                ref.row_pos, i);
+                
dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos);
             }
         } else {
             for (auto& it : _temp_ref_map) {
diff --git 
a/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out 
b/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out
index 1a7ed71eb2e..8da75c78275 100644
--- a/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out
+++ b/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out
@@ -46,3 +46,9 @@ datetimev2_value_min_2        datetime(6)     Yes     false   
\N      MIN
 datetimev2_value_replace_2     datetime(6)     Yes     false   \N      REPLACE
 datetimev2_value_replace_if_not_null_2 datetime(6)     Yes     false   \N      
REPLACE_IF_NOT_NULL
 
+-- !string_agg_table_with_null --
+-100   0       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
hi
+-100   1       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+-100   1       1       true    2024-02-16T04:37:37     1.399962421904282E9     
doris
+-100   1       92      true    2024-02-16T04:37:37     2.34234230324234E7      
NULL
+
diff --git 
a/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy 
b/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy
index f7c4d8266ae..9d0322df17d 100644
--- a/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy
+++ b/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy
@@ -99,4 +99,36 @@ suite("test_aggregate_table") {
     qt_desc_date_table """desc date_agg"""
     sql """DROP TABLE date_agg"""
 
+    sql """DROP TABLE IF EXISTS test_string_agg_with_null"""
+    sql """ 
+        CREATE TABLE `test_string_agg_with_null` (
+            `col1` tinyint NOT NULL,
+            `col2` int NOT NULL,
+            `col3` tinyint NOT NULL,
+            `col5` boolean REPLACE NOT NULL,
+            `col4` datetime(2) REPLACE NOT NULL,
+            `col6` double REPLACE_IF_NOT_NULL NULL,
+            `col7` varchar(100) REPLACE_IF_NOT_NULL NULL
+        ) ENGINE=OLAP
+        AGGREGATE KEY(`col1`, `col2`, `col3`)
+        DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "disable_auto_compaction" = "true"
+        ); 
+    """
+
+    sql """ insert into test_string_agg_with_null values
+        ( -100 ,    1 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , 
-1299962421.904282 , NULL ),
+        ( -100 ,    0 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , 
-1299962421.904282 , "hi" ),
+        ( -100 ,    1 ,   92 ,    1 , '2024-02-16 04:37:37.00' ,   
23423423.0324234 , "NULL" );
+    """
+
+    sql """ insert into test_string_agg_with_null values
+        ( -100 ,    1 ,  1 ,    1 , '2024-02-16 04:37:37.00' , 
1399962421.904282 , "doris" );
+        """
+    
+    qt_string_agg_table_with_null """ select * from test_string_agg_with_null 
"""
+    sql """DROP TABLE test_string_agg_with_null"""
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to