This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3585c7e216f [test](parquet)append parquet reader byte_array_decimal 
and rle_bool case (#26751)
3585c7e216f is described below

commit 3585c7e216fdde7447b4736fa3a5454cc732fcf9
Author: daidai <2017501...@qq.com>
AuthorDate: Tue Nov 14 15:05:10 2023 +0800

    [test](parquet)append parquet reader byte_array_decimal and rle_bool case 
(#26751)
---
 .../exec/format/parquet/parquet_column_convert.h   |  18 ++++-
 .../hive/scripts/create_preinstalled_table.hql     |  14 ++++
 .../parquet_decimal_bool/null.parquet              | Bin 0 -> 4298 bytes
 .../parquet_decimal_bool/null2.parquet             | Bin 0 -> 8572 bytes
 .../parquet_decimal_bool/test.parquet              | Bin 0 -> 621 bytes
 .../parquet_decimal_bool/test_copy_1.parquet       | Bin 0 -> 1701 bytes
 .../parquet_decimal_bool/test_copy_2.parquet       | Bin 0 -> 1719 bytes
 .../hive/test_hive_basic_type.out                  |  81 +++++++++++++++++++++
 .../hive/test_hive_basic_type.groovy               |  23 ++++++
 9 files changed, 135 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h 
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 6cf3cfb6c50..8054f9b88e6 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -619,7 +619,23 @@ inline Status get_converter(tparquet::Type::type 
parquet_physical_type, Primitiv
                                                              
DecimalScaleParams::NO_SCALE>>();    \
                 }                                                              
                   \
             }                                                                  
                   \
-        } else if (tparquet::Type::INT32 == parquet_physical_type) {           
                   \
+        } else if (tparquet::Type::BYTE_ARRAY == parquet_physical_type) {      
                   \
+            
convert_params->init_decimal_converter<PRIMARY_TYPE>(dst_data_type);            
      \
+            using ValueCopyType = DECIMAL_TYPE::NativeType;                    
                   \
+            if (scale_params.scale_type == DecimalScaleParams::SCALE_UP) {     
                   \
+                *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE, 
ValueCopyType,        \
+                                                              
DecimalScaleParams::SCALE_UP>>();   \
+            } else if (scale_params.scale_type == 
DecimalScaleParams::SCALE_DOWN) {               \
+                *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE, 
ValueCopyType,        \
+                                                              
DecimalScaleParams::SCALE_DOWN>>(); \
+            } else {                                                           
                   \
+                *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE, 
ValueCopyType,        \
+                                                              
DecimalScaleParams::NO_SCALE>>();   \
+            }                                                                  
                   \
+                                                                               
                   \
+        }                                                                      
                   \
+                                                                               
                   \
+        else if (tparquet::Type::INT32 == parquet_physical_type) {             
                   \
             if (scale_params.scale_type == DecimalScaleParams::SCALE_UP) {     
                   \
                 *converter = std::make_unique<NumberToDecimal<Int32, 
PRIMARY_TYPE, int64_t,       \
                                                               
DecimalScaleParams::SCALE_UP>>();   \
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 740bf2fa584..a6d5c212a12 100644
--- 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -1762,6 +1762,20 @@ LOCATION
 
 msck repair table orc_decimal_table;
 
+CREATE TABLE `parquet_decimal_bool`(
+       decimals decimal(20,3),
+       bool_rle boolean
+)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+  '/user/doris/preinstalled_data/parquet_table/parquet_decimal_bool';
+
+msck repair table partition_table;
 
 show tables;
 
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet
new file mode 100644
index 00000000000..060bdf23bce
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet
new file mode 100644
index 00000000000..6279cdd873f
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet
new file mode 100644
index 00000000000..dbee42cece3
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet
new file mode 100644
index 00000000000..5ca4106056a
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet
new file mode 100644
index 00000000000..e1463724f4f
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet
 differ
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out 
b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
index 0398050f02b..94de65a4979 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
@@ -181,3 +181,84 @@ test       DATETIME(6)     Yes     true    \N
 \N     \N      \N      \N      \N      \N      \N      \N      \N      \N      
        test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
 \N     \N      \N      \N      \N      \N      \N      \N      \N      \N      
        test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
 
+-- !parquet --
+219
+
+-- !parquet1 --
+-7871.416      false
+-7871.416      false
+-7871.416      false
+-7871.416      true
+-7871.416      true
+-7871.416      true
+-7871.416      true
+
+-- !parquet2 --
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+
+-- !parquet3 --
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+
+-- !parquet4 --
+-2633.645
+-2633.645
+-2633.645
+-2633.645
+-2633.645
+-2633.645
+-2633.645
+
+-- !parquet5 --
+-7871.416
+-7871.416
+-7871.416
+-7871.416
+-7871.416
+-7871.416
+-7871.416
+
+-- !parquet7 --
+true
+true
+true
+true
+true
+true
+true
+
+-- !parquet8 --
+true
+true
+true
+true
+true
+true
+true
+
+-- !parquet9 --
+116
+
+-- !parquet10 --
+123
+
+-- !parquet11 --
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+123.456
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy
index bf58eb163c7..84720216fe0 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy
@@ -108,6 +108,29 @@ suite("test_hive_basic_type", 
"external_docker,hive,external_docker_hive,p0,exte
         // orc_all_types_t predicate test
         order_qt_41 """select * from 
${catalog_name}.${ex_db_name}.orc_all_types_t where t_int = 3;"""
 
+        //test parquet  byte_array_decimal and rle_bool 
+        order_qt_parquet """ select count(*) from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool """
+        order_qt_parquet1 """ select * from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool 
+                where decimals is not null and  bool_rle is not null  order by 
decimals,bool_rle limit 7 """ 
+        order_qt_parquet2 """ select decimals from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool 
+                where decimals is not null and decimals > 1  order by decimals 
limit 7 """ 
+        order_qt_parquet3 """ select decimals from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool 
+                where decimals = 123.456  order by decimals limit 7 """ 
+        order_qt_parquet4 """ select decimals from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool 
+                where decimals != -7871.416 and decimals is not null order by 
decimals limit 7 """ 
+    
+        order_qt_parquet5 """ select decimals from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool 
+                where decimals is not null and decimals < 0  order by decimals 
limit 7 """ 
+        
+        order_qt_parquet7 """ select bool_rle from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool 
+                where bool_rle is not null and bool_rle = 1 limit 7 """ 
+        order_qt_parquet8 """ select bool_rle from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool 
+                where bool_rle is not null and bool_rle = 1 limit 7 """ 
+        order_qt_parquet9 """ select count(bool_rle) from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool; """ 
+        order_qt_parquet10 """ select count(decimals) from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool; """ 
+        order_qt_parquet11 """ select decimals from 
${catalog_name}.${ex_db_name}.parquet_decimal_bool 
+                where decimals is not null and decimals > 1  order by decimals 
limit 7 """ 
+
         //sql """drop catalog if exists ${catalog_name} """
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to