This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1-lakehouse
in repository https://gitbox.apache.org/repos/asf/doris.git

commit da212850cc9824ad2e6186b294d0cb2e063f02f3
Author: morningman <morning...@163.com>
AuthorDate: Sat Feb 15 11:20:02 2025 +0800

    Revert "branch-2.1: [Fix](ORC) Not push down fixed char type in orc reader 
#45484 (#45525)"
    
    This reverts commit 7d32e4f71ff5ea6700af11223bd7970c572cb6d6.
---
 be/src/vec/exec/format/orc/vorc_reader.cpp         |  30 ++++++++-----
 be/src/vec/exec/format/orc/vorc_reader.h           |   4 +-
 be/src/vec/exec/scan/vfile_scanner.cpp             |  10 ++++-
 .../orc_predicate/orc_predicate_table.hql          |  16 -------
 .../data/multi_catalog/orc_predicate/run.sh        |   9 ----
 .../hive/test_hive_orc_predicate.out               | Bin 463 -> 0 bytes
 .../hive/test_hive_orc_predicate.groovy            |  50 ---------------------
 7 files changed, 32 insertions(+), 87 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 996effd554e..b175ce7ace1 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -143,7 +143,7 @@ void ORCFileInputStream::read(void* buf, uint64_t length, 
uint64_t offset) {
 OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state,
                      const TFileScanRangeParams& params, const TFileRangeDesc& 
range,
                      size_t batch_size, const std::string& ctz, io::IOContext* 
io_ctx,
-                     bool enable_lazy_mat)
+                     bool enable_lazy_mat, std::vector<orc::TypeKind>* 
unsupported_pushdown_types)
         : _profile(profile),
           _state(state),
           _scan_params(params),
@@ -156,7 +156,8 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* 
state,
           _enable_lazy_mat(enable_lazy_mat),
           _enable_filter_by_min_max(
                   state == nullptr ? true : 
state->query_options().enable_orc_filter_by_min_max),
-          _dict_cols_has_converted(false) {
+          _dict_cols_has_converted(false),
+          _unsupported_pushdown_types(unsupported_pushdown_types) {
     TimezoneUtils::find_cctz_time_zone(ctz, _time_zone);
     VecDateTimeValue t;
     t.from_unixtime(0, ctz);
@@ -452,8 +453,7 @@ static std::unordered_map<orc::TypeKind, 
orc::PredicateDataType> TYPEKIND_TO_PRE
         {orc::TypeKind::DOUBLE, orc::PredicateDataType::FLOAT},
         {orc::TypeKind::STRING, orc::PredicateDataType::STRING},
         {orc::TypeKind::BINARY, orc::PredicateDataType::STRING},
-        // should not pust down CHAR type, because CHAR type is fixed length 
and will be padded
-        // {orc::TypeKind::CHAR, orc::PredicateDataType::STRING},
+        {orc::TypeKind::CHAR, orc::PredicateDataType::STRING},
         {orc::TypeKind::VARCHAR, orc::PredicateDataType::STRING},
         {orc::TypeKind::DATE, orc::PredicateDataType::DATE},
         {orc::TypeKind::DECIMAL, orc::PredicateDataType::DECIMAL},
@@ -483,9 +483,8 @@ std::tuple<bool, orc::Literal> convert_to_orc_literal(const 
orc::Type* type, con
             [[fallthrough]];
         case orc::TypeKind::BINARY:
             [[fallthrough]];
-        // should not pust down CHAR type, because CHAR type is fixed length 
and will be padded
-        // case orc::TypeKind::CHAR:
-        //     [[fallthrough]];
+        case orc::TypeKind::CHAR:
+            [[fallthrough]];
         case orc::TypeKind::VARCHAR: {
             StringRef* string_value = (StringRef*)value;
             return std::make_tuple(true, orc::Literal(string_value->data, 
string_value->size));
@@ -561,7 +560,8 @@ std::tuple<bool, orc::Literal> convert_to_orc_literal(const 
orc::Type* type, con
 
 template <PrimitiveType primitive_type>
 std::vector<OrcPredicate> value_range_to_predicate(
-        const ColumnValueRange<primitive_type>& col_val_range, const 
orc::Type* type) {
+        const ColumnValueRange<primitive_type>& col_val_range, const 
orc::Type* type,
+        std::vector<orc::TypeKind>* unsupported_pushdown_types) {
     std::vector<OrcPredicate> predicates;
 
     PrimitiveType src_type = OrcReader::convert_to_doris_type(type).type;
@@ -572,6 +572,16 @@ std::vector<OrcPredicate> value_range_to_predicate(
         }
     }
 
+    if (unsupported_pushdown_types != nullptr) {
+        for (vector<orc::TypeKind>::iterator it = 
unsupported_pushdown_types->begin();
+             it != unsupported_pushdown_types->end(); ++it) {
+            if (*it == type->getKind()) {
+                // Unsupported type
+                return predicates;
+            }
+        }
+    }
+
     orc::PredicateDataType predicate_data_type;
     auto type_it = TYPEKIND_TO_PREDICATE_TYPE.find(type->getKind());
     if (type_it == TYPEKIND_TO_PREDICATE_TYPE.end()) {
@@ -713,8 +723,8 @@ bool OrcReader::_init_search_argument(
         }
         std::visit(
                 [&](auto& range) {
-                    std::vector<OrcPredicate> value_predicates =
-                            value_range_to_predicate(range, type_it->second);
+                    std::vector<OrcPredicate> value_predicates = 
value_range_to_predicate(
+                            range, type_it->second, 
_unsupported_pushdown_types);
                     for (auto& range_predicate : value_predicates) {
                         predicates.emplace_back(range_predicate);
                     }
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index 95afe21e144..b286b714ad9 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -133,7 +133,8 @@ public:
 
     OrcReader(RuntimeProfile* profile, RuntimeState* state, const 
TFileScanRangeParams& params,
               const TFileRangeDesc& range, size_t batch_size, const 
std::string& ctz,
-              io::IOContext* io_ctx, bool enable_lazy_mat = true);
+              io::IOContext* io_ctx, bool enable_lazy_mat = true,
+              std::vector<orc::TypeKind>* unsupported_pushdown_types = 
nullptr);
 
     OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
               const std::string& ctz, io::IOContext* io_ctx, bool 
enable_lazy_mat = true);
@@ -618,6 +619,7 @@ private:
     std::unique_ptr<StringDictFilterImpl> _string_dict_filter;
     bool _dict_cols_has_converted = false;
     bool _has_complex_type = false;
+    std::vector<orc::TypeKind>* _unsupported_pushdown_types;
 
     // resolve schema change
     std::unordered_map<std::string, 
std::unique_ptr<converter::ColumnTypeConverter>> _converters;
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index f4376cc1e70..7c65faac81c 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -893,9 +893,17 @@ Status VFileScanner::_get_next_reader() {
             break;
         }
         case TFileFormatType::FORMAT_ORC: {
+            std::vector<orc::TypeKind>* unsupported_pushdown_types = nullptr;
+            if (range.__isset.table_format_params &&
+                range.table_format_params.table_format_type == "paimon") {
+                static std::vector<orc::TypeKind> paimon_unsupport_type =
+                        std::vector<orc::TypeKind> {orc::TypeKind::CHAR};
+                unsupported_pushdown_types = &paimon_unsupport_type;
+            }
             std::unique_ptr<OrcReader> orc_reader = OrcReader::create_unique(
                     _profile, _state, *_params, range, 
_state->query_options().batch_size,
-                    _state->timezone(), _io_ctx.get(), 
_state->query_options().enable_orc_lazy_mat);
+                    _state->timezone(), _io_ctx.get(), 
_state->query_options().enable_orc_lazy_mat,
+                    unsupported_pushdown_types);
             orc_reader->set_push_down_agg_type(_get_push_down_agg_type());
             if (push_down_predicates) {
                 RETURN_IF_ERROR(_process_late_arrival_conjuncts());
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql
deleted file mode 100644
index a946b25ff1a..00000000000
--- 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql
+++ /dev/null
@@ -1,16 +0,0 @@
-CREATE DATABASE IF NOT EXISTS multi_catalog;
-USE multi_catalog;
-
-create table fixed_char_table (
-  i int,
-  c char(2)
-) stored as orc;
-
-insert into fixed_char_table values(1,'a'),(2,'b '), (3,'cd');
-
-create table type_changed_table (
-  id int,
-  name string 
-) stored as orc;
-insert into type_changed_table values (1, 'Alice'), (2, 'Bob'), (3, 'Charlie');
-ALTER TABLE type_changed_table CHANGE COLUMN id id STRING;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/run.sh
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/run.sh
deleted file mode 100755
index f934ff3009c..00000000000
--- 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/run.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-set -x
-
-CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
-
-# create table
-hive -f "${CUR_DIR}"/orc_predicate_table.hql
-
-
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out 
b/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out
deleted file mode 100644
index f42bb629550..00000000000
Binary files 
a/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out and 
/dev/null differ
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy
deleted file mode 100644
index d9b6357ca0a..00000000000
--- 
a/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy
+++ /dev/null
@@ -1,50 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_hive_orc_predicate", 
"p0,external,hive,external_docker,external_docker_hive") {
-
-    String enabled = context.config.otherConfigs.get("enableHiveTest")
-    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
-        logger.info("disable Hive test.")
-        return;
-    }
-
-    for (String hivePrefix : ["hive2", "hive3"]) {
-        try {
-            String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
-            String catalog_name = "${hivePrefix}_test_predicate"
-            String externalEnvIp = 
context.config.otherConfigs.get("externalEnvIp")
-
-            sql """drop catalog if exists ${catalog_name}"""
-            sql """create catalog if not exists ${catalog_name} properties (
-                "type"="hms",
-                'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
-            );"""
-            sql """use `${catalog_name}`.`multi_catalog`"""
-
-            qt_predicate_fixed_char1 """ select * from fixed_char_table where 
c = 'a';"""
-            qt_predicate_fixed_char2 """ select * from fixed_char_table where 
c = 'a ';"""
-
-            qt_predicate_changed_type1 """ select * from type_changed_table 
where id = '1';"""
-            qt_predicate_changed_type2 """ select * from type_changed_table 
where id = '2';"""
-            qt_predicate_changed_type3 """ select * from type_changed_table 
where id = '3';"""
-
-            sql """drop catalog if exists ${catalog_name}"""
-        } finally {
-        }
-    }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to