This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1-lakehouse in repository https://gitbox.apache.org/repos/asf/doris.git
commit 5b3591b510ac91a3a26f08da788e57bbeff12686 Author: Socrates <[email protected]> AuthorDate: Tue Feb 11 12:02:44 2025 +0800 [fix](orc) remove unnecessary fields of orc_reader (#47506) ### What problem does this PR solve? remove unnecessary fields of orc_reader: - remove `_col_name_to_file_col_name_low_case` by storing original field name in `type_map` - add comment to describe the the functionality of these mappings --- be/src/vec/exec/format/orc/vorc_reader.cpp | 18 +++++++----------- be/src/vec/exec/format/orc/vorc_reader.h | 4 ++-- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index fd3ca829517..104fd4b9744 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -391,9 +391,6 @@ Status OrcReader::_init_read_columns() { } _col_name_to_file_col_name[col_name] = read_col; - // TODO: refactor this - std::transform(read_col.begin(), read_col.end(), read_col.begin(), ::tolower); - _col_name_to_file_col_name_low_case[col_name] = read_col; } } return Status::OK(); @@ -410,9 +407,9 @@ void OrcReader::_init_orc_cols(const orc::Type& type, std::vector<std::string>& if (hive1_orc) { hive1_orc = _is_hive1_col_name(filed_name_lower_case); } - auto filed_name_lower_case_copy = filed_name_lower_case; orc_cols_lower_case.emplace_back(std::move(filed_name_lower_case)); - type_map.emplace(std::move(filed_name_lower_case_copy), type.getSubtype(i)); + auto file_name = type.getFieldName(i); + type_map.emplace(std::move(file_name), type.getSubtype(i)); if (_is_acid) { const orc::Type* sub_type = type.getSubtype(i); if (sub_type->getKind() == orc::TypeKind::STRUCT) { @@ -569,15 +566,14 @@ std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type, std::tuple<bool, orc::Literal, orc::PredicateDataType> OrcReader::_make_orc_literal( const VSlotRef* slot_ref, const VLiteral* literal) { - DCHECK(_col_name_to_file_col_name_low_case.contains(slot_ref->expr_name())); - auto file_col_name_low_case = _col_name_to_file_col_name_low_case[slot_ref->expr_name()]; - if (!_type_map.contains(file_col_name_low_case)) { - // TODO: this is for acid table + DCHECK(_col_name_to_file_col_name.contains(slot_ref->expr_name())); + auto file_col_name = _col_name_to_file_col_name[slot_ref->expr_name()]; + if (!_type_map.contains(file_col_name)) { LOG(WARNING) << "Column " << slot_ref->expr_name() << " not found in _type_map"; return std::make_tuple(false, orc::Literal(false), orc::PredicateDataType::LONG); } - DCHECK(_type_map.contains(file_col_name_low_case)); - const auto* orc_type = _type_map[file_col_name_low_case]; + DCHECK(_type_map.contains(file_col_name)); + const auto* orc_type = _type_map[file_col_name]; if (!TYPEKIND_TO_PREDICATE_TYPE.contains(orc_type->getKind())) { LOG(WARNING) << "Unsupported Push Down Orc Type [TypeKind=" << orc_type->getKind() << "]"; return std::make_tuple(false, orc::Literal(false), orc::PredicateDataType::LONG); diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 88f4854f056..e6692ae4647 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -593,9 +593,9 @@ private: // 2. If true, use indexes instead of column names when reading orc tables. bool _is_hive1_orc_or_use_idx = false; + // map col name in metastore to col name in orc file std::unordered_map<std::string, std::string> _col_name_to_file_col_name; - // TODO: check if we can remove _col_name_to_file_col_name_low_case - std::unordered_map<std::string, std::string> _col_name_to_file_col_name_low_case; + // map col name in orc file to orc type std::unordered_map<std::string, const orc::Type*> _type_map; std::vector<const orc::Type*> _col_orc_type; std::unique_ptr<ORCFileInputStream> _file_input_stream; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
