This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e4149c6e4c5 [Fix](parquet-reader) Fix null map issue in parquet 
reader. (#27777)
e4149c6e4c5 is described below

commit e4149c6e4c5e01c57d00d3ef2748c8dcf751121e
Author: Qi Chen <kaka11.c...@gmail.com>
AuthorDate: Thu Nov 30 13:55:37 2023 +0800

    [Fix](parquet-reader) Fix null map issue in parquet reader. (#27777)
    
    Fix null map issue in parquet reader which cause result incorrect such as 
`min()`, `max()`.
    
    In order to share null map between parquet converted src column and dst 
column to avoid copying. It is very tricky that will call mutable function 
`doris_nullable_column->get_null_map_column_ptr()` which will set 
`_need_update_has_null = true`. Because some operations such as agg will call 
`has_null()` to set `_need_update_has_null = false`.
---
 .../exec/format/parquet/parquet_column_convert.cpp    |   6 +++++-
 .../hive/scripts/create_preinstalled_table.hql        |  13 +++++++++++++
 ...f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet | Bin 0 -> 2003651 bytes
 .../data/external_table_p0/hive/test_hive_parquet.out |  12 ++++++++++++
 .../external_table_p0/hive/test_hive_parquet.groovy   |  16 ++++++++++++++++
 5 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp 
b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
index 34b6da3e571..28ba92b8680 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
@@ -71,7 +71,11 @@ ColumnPtr get_column(tparquet::Type::type 
parquet_physical_type, PrimitiveType s
     }
 
     if (*need_convert && doris_type->is_nullable()) {
-        auto doris_nullable_column = static_cast<const 
ColumnNullable*>(doris_column.get());
+        // In order to share null map between parquet converted src column and 
dst column to avoid copying. It is very tricky that will
+        // call mutable function 
`doris_nullable_column->get_null_map_column_ptr()` which will set 
`_need_update_has_null = true`.
+        // Because some operations such as agg will call `has_null()` to set 
`_need_update_has_null = false`.
+        auto doris_nullable_column =
+                const_cast<ColumnNullable*>(static_cast<const 
ColumnNullable*>(doris_column.get()));
         ans_column = ColumnNullable::create(ans_column,
                                             
doris_nullable_column->get_null_map_column_ptr());
     }
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 4e80d7466d2..dcaaa321e78 100644
--- 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -1788,6 +1788,19 @@ LOCATION
 
 msck repair table partition_table;
 
+CREATE TABLE `parquet_decimal90_table`(
+  `decimal_col` decimal(9,0))
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+  '/user/doris/preinstalled_data/parquet_table/parquet_decimal90_table';
+
+msck repair table parquet_decimal90_table;
+
 show tables;
 
 
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet
new file mode 100644
index 00000000000..9f28d71e504
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet
 differ
diff --git a/regression-test/data/external_table_p0/hive/test_hive_parquet.out 
b/regression-test/data/external_table_p0/hive/test_hive_parquet.out
index f444323407e..a54a25520da 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_parquet.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_parquet.out
@@ -10133,3 +10133,15 @@ rus    moscow  996
 us     chicago 1995
 us     washington      1999
 
+-- !q21_max --
+-115249949
+
+-- !q21_min --
+-999999917
+
+-- !q21_sum --
+-247549496714217
+
+-- !q21_avg --
+-495084140.9042
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
index 2daac80d71e..c60e1a4f0a6 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
@@ -139,6 +139,21 @@ suite("test_hive_parquet", 
"p0,external,hive,external_docker,external_docker_hiv
     """
     }
 
+    def q21 = {
+        qt_q21_max """
+        select max(decimal_col) from parquet_decimal90_table;
+        """
+        qt_q21_min """
+        select min(decimal_col) from parquet_decimal90_table;
+        """
+        qt_q21_sum """
+        select sum(decimal_col) from parquet_decimal90_table;
+        """
+        qt_q21_avg """
+        select avg(decimal_col) from parquet_decimal90_table;
+        """
+    }
+
     String enabled = context.config.otherConfigs.get("enableHiveTest")
     if (enabled != null && enabled.equalsIgnoreCase("true")) {
         try {
@@ -175,6 +190,7 @@ suite("test_hive_parquet", 
"p0,external,hive,external_docker,external_docker_hiv
             q18()
             q19()
             q20()
+            q21()
 
             sql """explain physical plan select l_partkey from partition_table
                 where (nation != 'cn' or city !='beijing') and (l_quantity > 
28 or l_extendedprice > 30000)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to