This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 3585c7e216f [test](parquet)append parquet reader byte_array_decimal and rle_bool case (#26751) 3585c7e216f is described below commit 3585c7e216fdde7447b4736fa3a5454cc732fcf9 Author: daidai <2017501...@qq.com> AuthorDate: Tue Nov 14 15:05:10 2023 +0800 [test](parquet)append parquet reader byte_array_decimal and rle_bool case (#26751) --- .../exec/format/parquet/parquet_column_convert.h | 18 ++++- .../hive/scripts/create_preinstalled_table.hql | 14 ++++ .../parquet_decimal_bool/null.parquet | Bin 0 -> 4298 bytes .../parquet_decimal_bool/null2.parquet | Bin 0 -> 8572 bytes .../parquet_decimal_bool/test.parquet | Bin 0 -> 621 bytes .../parquet_decimal_bool/test_copy_1.parquet | Bin 0 -> 1701 bytes .../parquet_decimal_bool/test_copy_2.parquet | Bin 0 -> 1719 bytes .../hive/test_hive_basic_type.out | 81 +++++++++++++++++++++ .../hive/test_hive_basic_type.groovy | 23 ++++++ 9 files changed, 135 insertions(+), 1 deletion(-) diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h b/be/src/vec/exec/format/parquet/parquet_column_convert.h index 6cf3cfb6c50..8054f9b88e6 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.h +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h @@ -619,7 +619,23 @@ inline Status get_converter(tparquet::Type::type parquet_physical_type, Primitiv DecimalScaleParams::NO_SCALE>>(); \ } \ } \ - } else if (tparquet::Type::INT32 == parquet_physical_type) { \ + } else if (tparquet::Type::BYTE_ARRAY == parquet_physical_type) { \ + convert_params->init_decimal_converter<PRIMARY_TYPE>(dst_data_type); \ + using ValueCopyType = DECIMAL_TYPE::NativeType; \ + if (scale_params.scale_type == DecimalScaleParams::SCALE_UP) { \ + *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE, ValueCopyType, \ + DecimalScaleParams::SCALE_UP>>(); \ + } else if (scale_params.scale_type == DecimalScaleParams::SCALE_DOWN) { \ + *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE, ValueCopyType, \ + DecimalScaleParams::SCALE_DOWN>>(); \ + } else { \ + *converter = std::make_unique<StringToDecimal<DECIMAL_TYPE, ValueCopyType, \ + DecimalScaleParams::NO_SCALE>>(); \ + } \ + \ + } \ + \ + else if (tparquet::Type::INT32 == parquet_physical_type) { \ if (scale_params.scale_type == DecimalScaleParams::SCALE_UP) { \ *converter = std::make_unique<NumberToDecimal<Int32, PRIMARY_TYPE, int64_t, \ DecimalScaleParams::SCALE_UP>>(); \ diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index 740bf2fa584..a6d5c212a12 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -1762,6 +1762,20 @@ LOCATION msck repair table orc_decimal_table; +CREATE TABLE `parquet_decimal_bool`( + decimals decimal(20,3), + bool_rle boolean +) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/parquet_table/parquet_decimal_bool'; + +msck repair table partition_table; show tables; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet new file mode 100644 index 00000000000..060bdf23bce Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet new file mode 100644 index 00000000000..6279cdd873f Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/null2.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet new file mode 100644 index 00000000000..dbee42cece3 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet new file mode 100644 index 00000000000..5ca4106056a Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_1.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet new file mode 100644 index 00000000000..e1463724f4f Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal_bool/test_copy_2.parquet differ diff --git a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out index 0398050f02b..94de65a4979 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out +++ b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out @@ -181,3 +181,84 @@ test DATETIME(6) Yes true \N \N \N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] \N \N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] +-- !parquet -- +219 + +-- !parquet1 -- +-7871.416 false +-7871.416 false +-7871.416 false +-7871.416 true +-7871.416 true +-7871.416 true +-7871.416 true + +-- !parquet2 -- +123.456 +123.456 +123.456 +123.456 +123.456 +123.456 +123.456 + +-- !parquet3 -- +123.456 +123.456 +123.456 +123.456 +123.456 +123.456 +123.456 + +-- !parquet4 -- +-2633.645 +-2633.645 +-2633.645 +-2633.645 +-2633.645 +-2633.645 +-2633.645 + +-- !parquet5 -- +-7871.416 +-7871.416 +-7871.416 +-7871.416 +-7871.416 +-7871.416 +-7871.416 + +-- !parquet7 -- +true +true +true +true +true +true +true + +-- !parquet8 -- +true +true +true +true +true +true +true + +-- !parquet9 -- +116 + +-- !parquet10 -- +123 + +-- !parquet11 -- +123.456 +123.456 +123.456 +123.456 +123.456 +123.456 +123.456 + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy b/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy index bf58eb163c7..84720216fe0 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_basic_type.groovy @@ -108,6 +108,29 @@ suite("test_hive_basic_type", "external_docker,hive,external_docker_hive,p0,exte // orc_all_types_t predicate test order_qt_41 """select * from ${catalog_name}.${ex_db_name}.orc_all_types_t where t_int = 3;""" + //test parquet byte_array_decimal and rle_bool + order_qt_parquet """ select count(*) from ${catalog_name}.${ex_db_name}.parquet_decimal_bool """ + order_qt_parquet1 """ select * from ${catalog_name}.${ex_db_name}.parquet_decimal_bool + where decimals is not null and bool_rle is not null order by decimals,bool_rle limit 7 """ + order_qt_parquet2 """ select decimals from ${catalog_name}.${ex_db_name}.parquet_decimal_bool + where decimals is not null and decimals > 1 order by decimals limit 7 """ + order_qt_parquet3 """ select decimals from ${catalog_name}.${ex_db_name}.parquet_decimal_bool + where decimals = 123.456 order by decimals limit 7 """ + order_qt_parquet4 """ select decimals from ${catalog_name}.${ex_db_name}.parquet_decimal_bool + where decimals != -7871.416 and decimals is not null order by decimals limit 7 """ + + order_qt_parquet5 """ select decimals from ${catalog_name}.${ex_db_name}.parquet_decimal_bool + where decimals is not null and decimals < 0 order by decimals limit 7 """ + + order_qt_parquet7 """ select bool_rle from ${catalog_name}.${ex_db_name}.parquet_decimal_bool + where bool_rle is not null and bool_rle = 1 limit 7 """ + order_qt_parquet8 """ select bool_rle from ${catalog_name}.${ex_db_name}.parquet_decimal_bool + where bool_rle is not null and bool_rle = 1 limit 7 """ + order_qt_parquet9 """ select count(bool_rle) from ${catalog_name}.${ex_db_name}.parquet_decimal_bool; """ + order_qt_parquet10 """ select count(decimals) from ${catalog_name}.${ex_db_name}.parquet_decimal_bool; """ + order_qt_parquet11 """ select decimals from ${catalog_name}.${ex_db_name}.parquet_decimal_bool + where decimals is not null and decimals > 1 order by decimals limit 7 """ + //sql """drop catalog if exists ${catalog_name} """ } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org