This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e4149c6e4c5 [Fix](parquet-reader) Fix null map issue in parquet reader. (#27777) e4149c6e4c5 is described below commit e4149c6e4c5e01c57d00d3ef2748c8dcf751121e Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Thu Nov 30 13:55:37 2023 +0800 [Fix](parquet-reader) Fix null map issue in parquet reader. (#27777) Fix null map issue in parquet reader which cause result incorrect such as `min()`, `max()`. In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. --- .../exec/format/parquet/parquet_column_convert.cpp | 6 +++++- .../hive/scripts/create_preinstalled_table.hql | 13 +++++++++++++ ...f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet | Bin 0 -> 2003651 bytes .../data/external_table_p0/hive/test_hive_parquet.out | 12 ++++++++++++ .../external_table_p0/hive/test_hive_parquet.groovy | 16 ++++++++++++++++ 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp index 34b6da3e571..28ba92b8680 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp @@ -71,7 +71,11 @@ ColumnPtr get_column(tparquet::Type::type parquet_physical_type, PrimitiveType s } if (*need_convert && doris_type->is_nullable()) { - auto doris_nullable_column = static_cast<const ColumnNullable*>(doris_column.get()); + // In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will + // call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. + // Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. + auto doris_nullable_column = + const_cast<ColumnNullable*>(static_cast<const ColumnNullable*>(doris_column.get())); ans_column = ColumnNullable::create(ans_column, doris_nullable_column->get_null_map_column_ptr()); } diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index 4e80d7466d2..dcaaa321e78 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -1788,6 +1788,19 @@ LOCATION msck repair table partition_table; +CREATE TABLE `parquet_decimal90_table`( + `decimal_col` decimal(9,0)) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/parquet_table/parquet_decimal90_table'; + +msck repair table parquet_decimal90_table; + show tables; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet new file mode 100644 index 00000000000..9f28d71e504 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet differ diff --git a/regression-test/data/external_table_p0/hive/test_hive_parquet.out b/regression-test/data/external_table_p0/hive/test_hive_parquet.out index f444323407e..a54a25520da 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_parquet.out +++ b/regression-test/data/external_table_p0/hive/test_hive_parquet.out @@ -10133,3 +10133,15 @@ rus moscow 996 us chicago 1995 us washington 1999 +-- !q21_max -- +-115249949 + +-- !q21_min -- +-999999917 + +-- !q21_sum -- +-247549496714217 + +-- !q21_avg -- +-495084140.9042 + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy index 2daac80d71e..c60e1a4f0a6 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy @@ -139,6 +139,21 @@ suite("test_hive_parquet", "p0,external,hive,external_docker,external_docker_hiv """ } + def q21 = { + qt_q21_max """ + select max(decimal_col) from parquet_decimal90_table; + """ + qt_q21_min """ + select min(decimal_col) from parquet_decimal90_table; + """ + qt_q21_sum """ + select sum(decimal_col) from parquet_decimal90_table; + """ + qt_q21_avg """ + select avg(decimal_col) from parquet_decimal90_table; + """ + } + String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled != null && enabled.equalsIgnoreCase("true")) { try { @@ -175,6 +190,7 @@ suite("test_hive_parquet", "p0,external,hive,external_docker,external_docker_hiv q18() q19() q20() + q21() sql """explain physical plan select l_partkey from partition_table where (nation != 'cn' or city !='beijing') and (l_quantity > 28 or l_extendedprice > 30000) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org