This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new b399ed36ad8 [fix](OrcReader) fix the issue that orc_reader can not read DECIMAL(0,0) type of orc file #41795 (#42298) b399ed36ad8 is described below commit b399ed36ad8f66a99dadbe1db083f08ad391d3c1 Author: Rayner Chen <morning...@163.com> AuthorDate: Wed Oct 23 17:39:31 2024 +0800 [fix](OrcReader) fix the issue that orc_reader can not read DECIMAL(0,0) type of orc file #41795 (#42298) cherry pick from #41795 Co-authored-by: Tiewei Fang <43782773+bepppo...@users.noreply.github.com> --- be/src/vec/exec/format/orc/vorc_reader.cpp | 9 +++++++++ be/src/vec/exec/format/orc/vorc_reader.h | 1 - .../tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out | 7 +++++++ .../tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out | 12 ++++++++++++ .../tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy | 10 +++++----- .../tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy | 6 ++++++ 6 files changed, 39 insertions(+), 6 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index bb13aa36ef5..6b6639f2feb 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -96,6 +96,11 @@ namespace doris::vectorized { // TODO: we need to determine it by test. static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = std::numeric_limits<uint32_t>::max(); static constexpr char EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = ""; +// Because HIVE 0.11 & 0.12 does not support precision and scale for decimal +// The decimal type of orc file produced by HIVE 0.11 & 0.12 are DECIMAL(0,0) +// We should set a default precision and scale for these orc files. +static constexpr int decimal_precision_for_hive11 = BeConsts::MAX_DECIMAL128_PRECISION; +static constexpr int decimal_scale_for_hive11 = 10; #define FOR_FLAT_ORC_COLUMNS(M) \ M(TypeIndex::Int8, Int8, orc::LongVectorBatch) \ @@ -1050,6 +1055,10 @@ TypeDescriptor OrcReader::convert_to_doris_type(const orc::Type* orc_type) { case orc::TypeKind::TIMESTAMP: return TypeDescriptor(PrimitiveType::TYPE_DATETIMEV2); case orc::TypeKind::DECIMAL: + if (orc_type->getPrecision() == 0) { + return TypeDescriptor::create_decimalv3_type(decimal_precision_for_hive11, + decimal_scale_for_hive11); + } return TypeDescriptor::create_decimalv3_type(orc_type->getPrecision(), orc_type->getScale()); case orc::TypeKind::DATE: diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index c0b372dfcea..4aad5637ef5 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -587,7 +587,6 @@ private: std::unique_ptr<orc::Reader> _reader; std::unique_ptr<orc::RowReader> _row_reader; std::unique_ptr<ORCFilterImpl> _orc_filter; - orc::ReaderOptions _reader_options; orc::RowReaderOptions _row_reader_options; std::shared_ptr<io::FileSystem> _file_system; diff --git a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out index 12864d9c8a4..1e27e511487 100644 --- a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out +++ b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out @@ -9,6 +9,13 @@ 2014-02-11 8200-02-11 +-- !test_2 -- +12345678.6547450000 +12345678.6547450000 +12345678.6547450000 +12345678.6547450000 +12345678.6547450000 + -- !test_3 -- 2 foo 0.8 1 1969-12-31T16:00 5 eat 0.8 6 1969-12-31T16:00:20 diff --git a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out index 3046384b928..9b5840ac0cd 100644 --- a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out +++ b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out @@ -21,3 +21,15 @@ row 000009 Alyssa \N [3, 9, 15, 20] Ben red [] +-- !test_4 -- +2 foo 0.8 1.2000000000 1969-12-31T16:00 +5 eat 0.8 5.5000000000 1969-12-31T16:00:20 +13 bar 80.0 2.2000000000 1969-12-31T16:00:05 +29 cat 8.0 3.3000000000 1969-12-31T16:00:10 +70 dog 1.8 4.4000000000 1969-12-31T16:00:15 +100 zebra 8.0 0E-10 1969-12-31T16:04:10 +100 zebra 8.0 0E-10 1969-12-31T16:04:10 +100 zebra 8.0 0E-10 1969-12-31T16:04:10 +100 zebra 8.0 0E-10 1969-12-31T16:04:10 +100 zebra 8.0 0E-10 1969-12-31T16:04:10 + diff --git a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy index 44176a47fd4..e42b745bfae 100644 --- a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy +++ b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy @@ -41,11 +41,11 @@ suite("test_hdfs_orc_group1_orc_files","external,hive,tvf,external_docker") { // Doris cannot read this ORC file because of a NOT_IMPLEMENT error. - // uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc" - // order_qt_test_2 """ select * from HDFS( - // "uri" = "${uri}", - // "hadoop.username" = "${hdfsUserName}", - // "format" = "orc"); """ + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc" + order_qt_test_2 """ select decimal1 from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc") limit 5; """ uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc_split_elim.orc" diff --git a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy index 4495494a3f0..daf1d2a1383 100644 --- a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy +++ b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy @@ -49,6 +49,12 @@ suite("test_hdfs_orc_group2_orc_files","external,hive,tvf,external_docker") { "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "orc"); """ + + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group2/orc_split_elim.orc" + qt_test_4 """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc") order by userid limit 10; """ } finally { } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org