This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7672034aef6 [minor](parquet) Refine parquet reader (#56105)
7672034aef6 is described below

commit 7672034aef6e1a9bf23074380e52560eb2dc3dbb
Author: Gabriel <[email protected]>
AuthorDate: Wed Sep 17 09:50:56 2025 +0800

    [minor](parquet) Refine parquet reader (#56105)
---
 be/src/vec/exec/format/parquet/vparquet_reader.cpp  |  4 +++-
 be/src/vec/exec/format/parquet/vparquet_reader.h    |  3 ++-
 be/src/vec/exec/format/table/iceberg_reader.h       |  7 ++++---
 .../vec/exec/format/table/table_format_reader.cpp   | 11 +++++------
 be/src/vec/exec/format/table/table_format_reader.h  | 21 ++++++++++++---------
 5 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 82b7bcf9c42..17fe7f4f5fd 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -126,11 +126,13 @@ ParquetReader::~ParquetReader() {
     _close_internal();
 }
 
+#ifdef BE_TEST
 // for unit test
 void ParquetReader::set_file_reader(io::FileReaderSPtr file_reader) {
     _file_reader = file_reader;
     _tracing_file_reader = file_reader;
 }
+#endif
 
 void ParquetReader::_init_profile() {
     if (_profile != nullptr) {
@@ -1030,7 +1032,7 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
         }
     };
 
-    if ((!_enable_filter_by_min_max) || _lazy_read_ctx.has_complex_type ||
+    if (!_enable_filter_by_min_max || _lazy_read_ctx.has_complex_type ||
         _lazy_read_ctx.conjuncts.empty()) {
         read_whole_row_group();
         return Status::OK();
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_reader.h
index e12a07062d5..849a69673c5 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.h
@@ -108,8 +108,10 @@ public:
                   bool enable_lazy_mat = true);
 
     ~ParquetReader() override;
+#ifdef BE_TEST
     // for unit test
     void set_file_reader(io::FileReaderSPtr file_reader);
+#endif
 
     Status init_reader(
             const std::vector<std::string>& all_column_names,
@@ -242,7 +244,6 @@ private:
     bool _check_slot_can_push_down(const VExprSPtr& expr);
     bool _check_other_children_is_literal(const VExprSPtr& expr);
 
-private:
     RuntimeProfile* _profile = nullptr;
     const TFileScanRangeParams& _scan_params;
     const TFileRangeDesc& _scan_range;
diff --git a/be/src/vec/exec/format/table/iceberg_reader.h 
b/be/src/vec/exec/format/table/iceberg_reader.h
index 0fb07b2b8ff..188e31f0378 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.h
+++ b/be/src/vec/exec/format/table/iceberg_reader.h
@@ -173,9 +173,6 @@ public:
             const VExprContextSPtrs* not_single_slot_filter_conjuncts,
             const std::unordered_map<int, VExprContextSPtrs>* 
slot_id_to_filter_conjuncts);
 
-    Status _read_position_delete_file(const TFileRangeDesc* delete_range,
-                                      DeleteFile* position_delete) final;
-
     void set_delete_rows() final {
         auto* parquet_reader = (ParquetReader*)(_file_format_reader.get());
         parquet_reader->set_delete_rows(&_iceberg_delete_rows);
@@ -189,6 +186,10 @@ protected:
                                             
const_cast<cctz::time_zone*>(&_state->timezone_obj()),
                                             _io_ctx, _state, _meta_cache);
     }
+
+private:
+    Status _read_position_delete_file(const TFileRangeDesc* delete_range,
+                                      DeleteFile* position_delete) final;
 };
 class IcebergOrcReader final : public IcebergTableReader {
 public:
diff --git a/be/src/vec/exec/format/table/table_format_reader.cpp 
b/be/src/vec/exec/format/table/table_format_reader.cpp
index 4de4f2e2587..b3b7e1df336 100644
--- a/be/src/vec/exec/format/table/table_format_reader.cpp
+++ b/be/src/vec/exec/format/table/table_format_reader.cpp
@@ -627,17 +627,16 @@ std::string TableSchemaChangeHelper::debug(const 
std::shared_ptr<Node>& root, si
         ans += prefix + "ScalarNode\n";
     } else if (auto struct_node = std::dynamic_pointer_cast<StructNode>(root)) 
{
         ans += prefix + "StructNode\n";
-        for (const auto& [table_col_name, value] : 
struct_node->get_childrens()) {
-            const auto& [child_node, file_col_name, exist] = value;
+        for (const auto& [table_col_name, value] : 
struct_node->get_children()) {
             ans += indent(level + 1) + table_col_name;
-            if (exist) {
-                ans += " (file: " + file_col_name + ")";
+            if (value.exists) {
+                ans += " (file: " + value.column_name + ")";
             } else {
                 ans += " (not exists)";
             }
             ans += "\n";
-            if (child_node) {
-                ans += debug(child_node, level + 2);
+            if (value.node) {
+                ans += debug(value.node, level + 2);
             }
         }
     } else if (auto array_node = std::dynamic_pointer_cast<ArrayNode>(root)) {
diff --git a/be/src/vec/exec/format/table/table_format_reader.h 
b/be/src/vec/exec/format/table/table_format_reader.h
index 82687c28271..1f2a41af930 100644
--- a/be/src/vec/exec/format/table/table_format_reader.h
+++ b/be/src/vec/exec/format/table/table_format_reader.h
@@ -126,7 +126,6 @@ class TableSchemaChangeHelper {
 public:
     ~TableSchemaChangeHelper() = default;
 
-public:
     class Node {
     public:
         virtual ~Node() = default;
@@ -169,40 +168,44 @@ public:
     class ScalarNode : public Node {};
 
     class StructNode : public Node {
-        using ChildrenType = std::tuple<std::shared_ptr<Node>, std::string, 
bool>;
+        struct StructChild {
+            const std::shared_ptr<Node> node;
+            const std::string column_name;
+            const bool exists;
+        };
 
         // table column name -> { node, file_column_name, exists_in_file}
-        std::map<std::string, ChildrenType> children;
+        std::map<std::string, StructChild> children;
 
     public:
         std::shared_ptr<Node> get_children_node(std::string table_column_name) 
const override {
             DCHECK(children.contains(table_column_name));
             DCHECK(children_column_exists(table_column_name));
-            return std::get<0>(children.at(table_column_name));
+            return children.at(table_column_name).node;
         }
 
         std::string children_file_column_name(std::string table_column_name) 
const override {
             DCHECK(children.contains(table_column_name));
             DCHECK(children_column_exists(table_column_name));
-            return std::get<1>(children.at(table_column_name));
+            return children.at(table_column_name).column_name;
         }
 
         bool children_column_exists(std::string table_column_name) const 
override {
             DCHECK(children.contains(table_column_name));
-            return std::get<2>(children.at(table_column_name));
+            return children.at(table_column_name).exists;
         }
 
         void add_not_exist_children(std::string table_column_name) override {
-            children.emplace(table_column_name, std::make_tuple(nullptr, "", 
false));
+            children.emplace(table_column_name, StructChild {nullptr, "", 
false});
         }
 
         void add_children(std::string table_column_name, std::string 
file_column_name,
                           std::shared_ptr<Node> children_node) override {
             children.emplace(table_column_name,
-                             std::make_tuple(children_node, file_column_name, 
true));
+                             StructChild {children_node, file_column_name, 
true});
         }
 
-        const std::map<std::string, ChildrenType>& get_childrens() const { 
return children; }
+        const std::map<std::string, StructChild>& get_children() const { 
return children; }
     };
 
     class ArrayNode : public Node {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to