This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to tag 2.0.15.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4a6b26e1921d72ca9c13c593485cc2261a197a37 Author: Xin Liao <liaoxin...@126.com> AuthorDate: Mon Oct 21 11:29:17 2024 +0800 [fix](agg) Aggregating string types with null values may result in incorrect result (#42067) Aggregating string types with null values may result in incorrect result because using the replace_column_data function can cause incorrect offsets in the column. A reproducible caseļ¼ ``` CREATE TABLE `test_scan_keys_with_bool_type` ( `col1` tinyint NOT NULL, `col2` int NOT NULL, `col3` tinyint NOT NULL, `col5` boolean REPLACE NOT NULL, `col4` datetime(2) REPLACE NOT NULL, `col6` double REPLACE_IF_NOT_NULL NULL, `col7` varchar(100) REPLACE_IF_NOT_NULL NULL ) ENGINE=OLAP AGGREGATE KEY(`col1`, `col2`, `col3`) DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true" ); insert into test_scan_keys_with_bool_type values ( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ), ( -100 , 0 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "hi" ), ( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL ); insert into test_scan_keys_with_bool_type values ( -100 , 1 , 1 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "doris" ); MySQL [test]> select * from test_scan_keys_with_bool_type; +------+------+------+------+------------------------+---------------------+-------+ | col1 | col2 | col3 | col5 | col4 | col6 | col7 | +------+------+------+------+------------------------+---------------------+-------+ | -100 | 0 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hi | | -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | NULL | | -100 | 1 | 1 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hidor | | -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234 | NULL | +------+------+------+------+------------------------+---------------------+-------+ 4 rows in set (0.057 sec) MySQL [test]> set skip_storage_engine_merge = true; select * from test_scan_keys_with_bool_type; +------+------+------+------+------------------------+---------------------+-------+ | col1 | col2 | col3 | col5 | col4 | col6 | col7 | +------+------+------+------+------------------------+---------------------+-------+ | -100 | 0 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hi | | -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | NULL | | -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234 | NULL | | -100 | 1 | 1 | 1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | doris | +------+------+------+------+------------------------+---------------------+-------+ 4 rows in set (0.023 sec) ``` https://github.com/apache/doris/pull/33493 By supporting variant type aggregation, this issue has been resolved.So versions after 2.1 do not have this issue. --- be/src/vec/olap/block_reader.cpp | 4 +-- .../aggregate/test_aggregate_table.out | 6 ++++ .../aggregate/test_aggregate_table.groovy | 32 ++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp index d69efa181e8..5ff2890ab37 100644 --- a/be/src/vec/olap/block_reader.cpp +++ b/be/src/vec/olap/block_reader.cpp @@ -470,10 +470,10 @@ size_t BlockReader::_copy_agg_data() { auto& dst_column = _stored_data_columns[idx]; if (_stored_has_variable_length_tag[idx]) { //variable length type should replace ordered + dst_column->clear(); for (size_t i = 0; i < copy_size; i++) { auto& ref = _stored_row_ref[i]; - dst_column->replace_column_data(*ref.block->get_by_position(idx).column, - ref.row_pos, i); + dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos); } } else { for (auto& it : _temp_ref_map) { diff --git a/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out b/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out index 1a7ed71eb2e..8da75c78275 100644 --- a/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out +++ b/regression-test/data/data_model_p0/aggregate/test_aggregate_table.out @@ -46,3 +46,9 @@ datetimev2_value_min_2 datetime(6) Yes false \N MIN datetimev2_value_replace_2 datetime(6) Yes false \N REPLACE datetimev2_value_replace_if_not_null_2 datetime(6) Yes false \N REPLACE_IF_NOT_NULL +-- !string_agg_table_with_null -- +-100 0 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 hi +-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N +-100 1 1 true 2024-02-16T04:37:37 1.399962421904282E9 doris +-100 1 92 true 2024-02-16T04:37:37 2.34234230324234E7 NULL + diff --git a/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy b/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy index f7c4d8266ae..9d0322df17d 100644 --- a/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy +++ b/regression-test/suites/data_model_p0/aggregate/test_aggregate_table.groovy @@ -99,4 +99,36 @@ suite("test_aggregate_table") { qt_desc_date_table """desc date_agg""" sql """DROP TABLE date_agg""" + sql """DROP TABLE IF EXISTS test_string_agg_with_null""" + sql """ + CREATE TABLE `test_string_agg_with_null` ( + `col1` tinyint NOT NULL, + `col2` int NOT NULL, + `col3` tinyint NOT NULL, + `col5` boolean REPLACE NOT NULL, + `col4` datetime(2) REPLACE NOT NULL, + `col6` double REPLACE_IF_NOT_NULL NULL, + `col7` varchar(100) REPLACE_IF_NOT_NULL NULL + ) ENGINE=OLAP + AGGREGATE KEY(`col1`, `col2`, `col3`) + DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql """ insert into test_string_agg_with_null values + ( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ), + ( -100 , 0 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "hi" ), + ( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , "NULL" ); + """ + + sql """ insert into test_string_agg_with_null values + ( -100 , 1 , 1 , 1 , '2024-02-16 04:37:37.00' , 1399962421.904282 , "doris" ); + """ + + qt_string_agg_table_with_null """ select * from test_string_agg_with_null """ + sql """DROP TABLE test_string_agg_with_null""" + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org