This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new b399ed36ad8 [fix](OrcReader) fix the issue that orc_reader can not 
read DECIMAL(0,0) type of orc file #41795 (#42298)
b399ed36ad8 is described below

commit b399ed36ad8f66a99dadbe1db083f08ad391d3c1
Author: Rayner Chen <morning...@163.com>
AuthorDate: Wed Oct 23 17:39:31 2024 +0800

    [fix](OrcReader) fix the issue that orc_reader can not read DECIMAL(0,0) 
type of orc file #41795 (#42298)
    
    cherry pick from #41795
    
    Co-authored-by: Tiewei Fang <43782773+bepppo...@users.noreply.github.com>
---
 be/src/vec/exec/format/orc/vorc_reader.cpp                   |  9 +++++++++
 be/src/vec/exec/format/orc/vorc_reader.h                     |  1 -
 .../tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out           |  7 +++++++
 .../tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out           | 12 ++++++++++++
 .../tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy        | 10 +++++-----
 .../tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy        |  6 ++++++
 6 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index bb13aa36ef5..6b6639f2feb 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -96,6 +96,11 @@ namespace doris::vectorized {
 // TODO: we need to determine it by test.
 static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = 
std::numeric_limits<uint32_t>::max();
 static constexpr char 
EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = "";
+// Because HIVE 0.11 & 0.12 does not support precision and scale for decimal
+// The decimal type of orc file produced by HIVE 0.11 & 0.12 are DECIMAL(0,0)
+// We should set a default precision and scale for these orc files.
+static constexpr int decimal_precision_for_hive11 = 
BeConsts::MAX_DECIMAL128_PRECISION;
+static constexpr int decimal_scale_for_hive11 = 10;
 
 #define FOR_FLAT_ORC_COLUMNS(M)                            \
     M(TypeIndex::Int8, Int8, orc::LongVectorBatch)         \
@@ -1050,6 +1055,10 @@ TypeDescriptor OrcReader::convert_to_doris_type(const 
orc::Type* orc_type) {
     case orc::TypeKind::TIMESTAMP:
         return TypeDescriptor(PrimitiveType::TYPE_DATETIMEV2);
     case orc::TypeKind::DECIMAL:
+        if (orc_type->getPrecision() == 0) {
+            return 
TypeDescriptor::create_decimalv3_type(decimal_precision_for_hive11,
+                                                         
decimal_scale_for_hive11);
+        }
         return TypeDescriptor::create_decimalv3_type(orc_type->getPrecision(),
                                                      orc_type->getScale());
     case orc::TypeKind::DATE:
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index c0b372dfcea..4aad5637ef5 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -587,7 +587,6 @@ private:
     std::unique_ptr<orc::Reader> _reader;
     std::unique_ptr<orc::RowReader> _row_reader;
     std::unique_ptr<ORCFilterImpl> _orc_filter;
-    orc::ReaderOptions _reader_options;
     orc::RowReaderOptions _row_reader_options;
 
     std::shared_ptr<io::FileSystem> _file_system;
diff --git 
a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out
 
b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out
index 12864d9c8a4..1e27e511487 100644
--- 
a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out
+++ 
b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out
@@ -9,6 +9,13 @@
 2014-02-11
 8200-02-11
 
+-- !test_2 --
+12345678.6547450000
+12345678.6547450000
+12345678.6547450000
+12345678.6547450000
+12345678.6547450000
+
 -- !test_3 --
 2      foo     0.8     1       1969-12-31T16:00
 5      eat     0.8     6       1969-12-31T16:00:20
diff --git 
a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out
 
b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out
index 3046384b928..9b5840ac0cd 100644
--- 
a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out
+++ 
b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out
@@ -21,3 +21,15 @@ row 000009
 Alyssa \N      [3, 9, 15, 20]
 Ben    red     []
 
+-- !test_4 --
+2      foo     0.8     1.2000000000    1969-12-31T16:00
+5      eat     0.8     5.5000000000    1969-12-31T16:00:20
+13     bar     80.0    2.2000000000    1969-12-31T16:00:05
+29     cat     8.0     3.3000000000    1969-12-31T16:00:10
+70     dog     1.8     4.4000000000    1969-12-31T16:00:15
+100    zebra   8.0     0E-10   1969-12-31T16:04:10
+100    zebra   8.0     0E-10   1969-12-31T16:04:10
+100    zebra   8.0     0E-10   1969-12-31T16:04:10
+100    zebra   8.0     0E-10   1969-12-31T16:04:10
+100    zebra   8.0     0E-10   1969-12-31T16:04:10
+
diff --git 
a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy
 
b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy
index 44176a47fd4..e42b745bfae 100644
--- 
a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy
+++ 
b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy
@@ -41,11 +41,11 @@ 
suite("test_hdfs_orc_group1_orc_files","external,hive,tvf,external_docker") {
 
             // Doris cannot read this ORC file because of a NOT_IMPLEMENT 
error.
 
-            // uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc"
-            // order_qt_test_2 """ select * from HDFS(
-            //             "uri" = "${uri}",
-            //             "hadoop.username" = "${hdfsUserName}",
-            //             "format" = "orc"); """
+            uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc"
+            order_qt_test_2 """ select decimal1 from HDFS(
+                        "uri" = "${uri}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "orc") limit 5; """
 
 
             uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_orc/group1/orc_split_elim.orc"
diff --git 
a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy
 
b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy
index 4495494a3f0..daf1d2a1383 100644
--- 
a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy
+++ 
b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy
@@ -49,6 +49,12 @@ 
suite("test_hdfs_orc_group2_orc_files","external,hive,tvf,external_docker") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "orc"); """
+
+            uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_orc/group2/orc_split_elim.orc"
+            qt_test_4 """ select * from HDFS(
+                        "uri" = "${uri}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "orc") order by userid limit 10; """
         } finally {
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to