This is an automated email from the ASF dual-hosted git repository.

mrhhsg pushed a commit to branch nested_column_prune
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/nested_column_prune by this 
push:
     new 91d6df510e6 add test cases
91d6df510e6 is described below

commit 91d6df510e63400679682c22367f211ea5253c66
Author: Hu Shenggang <[email protected]>
AuthorDate: Wed Oct 22 18:15:04 2025 +0800

    add test cases
---
 be/src/olap/rowset/segment_v2/column_reader.cpp    | 101 +++++-----
 be/src/olap/rowset/segment_v2/column_reader.h      |   1 +
 .../olap/rowset/segment_v2/column_reader_test.cpp  | 221 +++++++++++++++++++++
 3 files changed, 271 insertions(+), 52 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 41702a463a1..48915887a79 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -77,6 +77,7 @@
 #include "vec/common/assert_cast.h"
 #include "vec/common/schema_util.h"
 #include "vec/common/string_ref.h"
+#include "vec/common/typeid_cast.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type_agg_state.h"
 #include "vec/data_types/data_type_factory.hpp"
@@ -925,6 +926,48 @@ Status 
ColumnReader::new_struct_iterator(ColumnIteratorUPtr* iterator,
     return Status::OK();
 }
 
+Result<TColumnAccessPaths> ColumnIterator::_get_sub_access_paths(
+        const TColumnAccessPaths& access_paths) {
+    TColumnAccessPaths sub_access_paths = access_paths;
+    for (auto it = sub_access_paths.name_access_paths.begin();
+         it != sub_access_paths.name_access_paths.end();) {
+        TColumnNameAccessPath& name_path = *it;
+        if (name_path.path.empty()) {
+            return ResultError(
+                    Status::InternalError("Invalid access path for struct 
column: path is empty"));
+        }
+
+        if (name_path.path[0] != _column_name) {
+            if (typeid_cast<ArrayFileColumnIterator*>(this) != nullptr) {
+                if (name_path.path[0] != "*") {
+                    return ResultError(Status::InternalError(
+                            R"(Invalid access path for array column: expected 
name "{}", got "{}")",
+                            _column_name, name_path.path[0]));
+                }
+            } else if (typeid_cast<MapFileColumnIterator*>(this) != nullptr) {
+                if (name_path.path[0] != "KEYS" && name_path.path[0] != 
"VALUES" &&
+                    name_path.path[0] != "*") {
+                    return ResultError(Status::InternalError(
+                            R"(Invalid access path for map column: expected 
name "{}", got "{}")",
+                            _column_name, name_path.path[0]));
+                }
+            } else {
+                return ResultError(Status::InternalError(
+                        R"(Invalid access path for column: expected name "{}", 
got "{}")",
+                        _column_name, name_path.path[0]));
+            }
+        }
+
+        name_path.path.erase(name_path.path.begin());
+        if (!name_path.path.empty()) {
+            ++it;
+        } else {
+            it = sub_access_paths.name_access_paths.erase(it);
+        }
+    }
+    return sub_access_paths;
+}
+
 ///====================== MapFileColumnIterator 
============================////
 MapFileColumnIterator::MapFileColumnIterator(std::shared_ptr<ColumnReader> 
reader,
                                              ColumnIteratorUPtr null_iterator,
@@ -1061,23 +1104,8 @@ Status MapFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_acc
                   << " to READING_FOR_PREDICATE";
     }
 
-    auto get_sub_access_paths = [&](const TColumnAccessPaths& access_paths) -> 
TColumnAccessPaths {
-        TColumnAccessPaths sub_access_paths = access_paths;
-        for (auto it = sub_access_paths.name_access_paths.begin();
-             it != sub_access_paths.name_access_paths.end();) {
-            TColumnNameAccessPath& name_path = *it;
-            if (name_path.path.size() > 1) {
-                name_path.path.erase(name_path.path.begin());
-                ++it;
-            } else {
-                it = sub_access_paths.name_access_paths.erase(it);
-            }
-        }
-        return sub_access_paths;
-    };
-
-    auto sub_all_access_paths = get_sub_access_paths(all_access_paths);
-    auto sub_predicate_access_paths = 
get_sub_access_paths(predicate_access_paths);
+    auto sub_all_access_paths = 
DORIS_TRY(_get_sub_access_paths(all_access_paths));
+    auto sub_predicate_access_paths = 
DORIS_TRY(_get_sub_access_paths(predicate_access_paths));
 
     if (sub_all_access_paths.name_access_paths.empty()) {
         return Status::OK();
@@ -1243,24 +1271,8 @@ Status StructFileColumnIterator::set_access_paths(
         LOG(INFO) << "Struct column iterator set sub-column " << _column_name
                   << " to READING_FOR_PREDICATE";
     }
-
-    auto get_sub_access_paths = [&](const TColumnAccessPaths& access_paths) -> 
TColumnAccessPaths {
-        TColumnAccessPaths sub_access_paths = access_paths;
-        for (auto it = sub_access_paths.name_access_paths.begin();
-             it != sub_access_paths.name_access_paths.end();) {
-            TColumnNameAccessPath& name_path = *it;
-            if (name_path.path.size() > 1) {
-                name_path.path.erase(name_path.path.begin());
-                ++it;
-            } else {
-                it = sub_access_paths.name_access_paths.erase(it);
-            }
-        }
-        return sub_access_paths;
-    };
-
-    auto sub_all_access_paths = get_sub_access_paths(all_access_paths);
-    auto sub_predicate_access_paths = 
get_sub_access_paths(predicate_access_paths);
+    auto sub_all_access_paths = 
DORIS_TRY(_get_sub_access_paths(all_access_paths));
+    auto sub_predicate_access_paths = 
DORIS_TRY(_get_sub_access_paths(predicate_access_paths));
 
     const auto no_sub_column_to_skip = 
sub_all_access_paths.name_access_paths.empty();
     const auto no_predicate_sub_column = 
sub_predicate_access_paths.name_access_paths.empty();
@@ -1488,23 +1500,8 @@ Status ArrayFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_a
                   << " to READING_FOR_PREDICATE";
     }
 
-    auto get_sub_access_paths = [&](const TColumnAccessPaths& access_paths) -> 
TColumnAccessPaths {
-        TColumnAccessPaths sub_access_paths = access_paths;
-        for (auto it = sub_access_paths.name_access_paths.begin();
-             it != sub_access_paths.name_access_paths.end();) {
-            TColumnNameAccessPath& name_path = *it;
-            if (name_path.path.size() > 1) {
-                name_path.path.erase(name_path.path.begin());
-                ++it;
-            } else {
-                it = sub_access_paths.name_access_paths.erase(it);
-            }
-        }
-        return sub_access_paths;
-    };
-
-    auto sub_all_access_paths = get_sub_access_paths(all_access_paths);
-    auto sub_predicate_access_paths = 
get_sub_access_paths(predicate_access_paths);
+    auto sub_all_access_paths = 
DORIS_TRY(_get_sub_access_paths(all_access_paths));
+    auto sub_predicate_access_paths = 
DORIS_TRY(_get_sub_access_paths(predicate_access_paths));
 
     const auto no_sub_column_to_skip = 
sub_all_access_paths.name_access_paths.empty();
     const auto no_predicate_sub_column = 
sub_predicate_access_paths.name_access_paths.empty();
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h 
b/be/src/olap/rowset/segment_v2/column_reader.h
index 062f8625678..d4ae7f86ea0 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -382,6 +382,7 @@ public:
     void set_reading_flag(ReadingFlag flag) { _reading_flag = flag; }
 
 protected:
+    Result<TColumnAccessPaths> _get_sub_access_paths(const TColumnAccessPaths& 
access_paths);
     ColumnIteratorOptions _opts;
 
     ReadingFlag _reading_flag {ReadingFlag::NORMAL_READING};
diff --git a/be/test/olap/rowset/segment_v2/column_reader_test.cpp 
b/be/test/olap/rowset/segment_v2/column_reader_test.cpp
new file mode 100644
index 00000000000..3523413d9b4
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/column_reader_test.cpp
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "olap/rowset/segment_v2/column_reader.h"
+
+#include <gen_cpp/Descriptors_types.h>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <memory>
+#include <thread>
+#include <vector>
+
+#include "agent/be_exec_version_manager.h"
+#include "common/config.h"
+#include "gen_cpp/olap_file.pb.h"
+#include "gen_cpp/segment_v2.pb.h"
+#include "io/fs/file_reader.h"
+#include "mock/mock_segment.h"
+#include "olap/rowset/segment_v2/column_reader_cache.h"
+#include "olap/rowset/segment_v2/segment.h"
+#include "olap/rowset/segment_v2/variant/variant_column_reader.h"
+#include "olap/tablet_schema.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris::segment_v2 {
+class ColumnReaderTest : public ::testing::Test {
+protected:
+    void SetUp() override {}
+    void TearDown() override {}
+};
+
+TEST_F(ColumnReaderTest, StructAccessPaths) {
+    auto create_struct_iterator = []() {
+        auto null_reader = std::make_shared<ColumnReader>();
+        auto null_iterator = std::make_unique<FileColumnIterator>(null_reader);
+
+        std::vector<ColumnIteratorUPtr> sub_column_iterators;
+        auto sub_reader1 = std::make_shared<ColumnReader>();
+        auto sub_iterator1 = std::make_unique<FileColumnIterator>(sub_reader1);
+        sub_iterator1->set_column_name("sub_col_1");
+        auto sub_reader2 = std::make_shared<ColumnReader>();
+        auto sub_iterator2 = std::make_unique<FileColumnIterator>(sub_reader2);
+        sub_iterator2->set_column_name("sub_col_2");
+
+        sub_column_iterators.emplace_back(std::move(sub_iterator1));
+        sub_column_iterators.emplace_back(std::move(sub_iterator2));
+        auto iterator = 
std::make_unique<StructFileColumnIterator>(std::make_shared<ColumnReader>(),
+                                                                   
std::move(null_iterator),
+                                                                   
std::move(sub_column_iterators));
+        return iterator;
+    };
+
+    auto iterator = create_struct_iterator();
+    auto st = iterator->set_access_paths(TColumnAccessPaths {}, 
TColumnAccessPaths {});
+
+    ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
+    ASSERT_EQ(iterator->_reading_flag, 
ColumnIterator::ReadingFlag::NORMAL_READING);
+
+    TColumnAccessPaths all_access_paths;
+    all_access_paths.name_access_paths.emplace_back();
+
+    TColumnAccessPaths predicate_access_paths;
+    predicate_access_paths.name_access_paths.emplace_back();
+
+    st = iterator->set_access_paths(all_access_paths, predicate_access_paths);
+    // empty paths leads to error
+    ASSERT_FALSE(st.ok());
+
+    // Only reading sub_col_1
+    // sub_col_2 should be set to SKIP_READING
+    all_access_paths.name_access_paths[0].path = {"self", "sub_col_1"};
+
+    predicate_access_paths.name_access_paths[0].path = {"self", "sub_col_1"};
+
+    st = iterator->set_access_paths(all_access_paths, predicate_access_paths);
+    // invalid name leads to error
+    ASSERT_FALSE(st.ok());
+
+    iterator->set_column_name("self");
+    // now column name is "self", should be ok
+    st = iterator->set_access_paths(all_access_paths, predicate_access_paths);
+    ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
+    ASSERT_EQ(iterator->_reading_flag, 
ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
+
+    ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag,
+              ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
+    ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag,
+              ColumnIterator::ReadingFlag::SKIP_READING);
+
+    // Reading all sub columns
+    all_access_paths.name_access_paths[0].path = {"self"};
+    iterator = create_struct_iterator();
+    iterator->set_column_name("self");
+    st = iterator->set_access_paths(all_access_paths, predicate_access_paths);
+
+    ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
+    ASSERT_EQ(iterator->_reading_flag, 
ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
+
+    ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag,
+              ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
+    ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag,
+              ColumnIterator::ReadingFlag::NORMAL_READING);
+}
+
+TEST_F(ColumnReaderTest, MultiAccessPaths) {
+    auto create_struct_iterator = []() {
+        auto null_reader = std::make_shared<ColumnReader>();
+        auto null_iterator = std::make_unique<FileColumnIterator>(null_reader);
+
+        std::vector<ColumnIteratorUPtr> sub_column_iterators;
+        auto sub_reader1 = std::make_shared<ColumnReader>();
+        auto sub_iterator1 = std::make_unique<FileColumnIterator>(sub_reader1);
+        sub_iterator1->set_column_name("sub_col_1");
+        auto sub_reader2 = std::make_shared<ColumnReader>();
+        auto sub_iterator2 = std::make_unique<FileColumnIterator>(sub_reader2);
+        sub_iterator2->set_column_name("sub_col_2");
+
+        sub_column_iterators.emplace_back(std::move(sub_iterator1));
+        sub_column_iterators.emplace_back(std::move(sub_iterator2));
+        auto iterator = 
std::make_unique<StructFileColumnIterator>(std::make_shared<ColumnReader>(),
+                                                                   
std::move(null_iterator),
+                                                                   
std::move(sub_column_iterators));
+        return iterator;
+    };
+
+    auto create_struct_iterator2 = [](ColumnIteratorUPtr&& nested_iterator) {
+        auto null_reader = std::make_shared<ColumnReader>();
+        auto null_iterator = std::make_unique<FileColumnIterator>(null_reader);
+
+        std::vector<ColumnIteratorUPtr> sub_column_iterators;
+        auto sub_reader1 = std::make_shared<ColumnReader>();
+        auto sub_iterator1 = std::make_unique<FileColumnIterator>(sub_reader1);
+        sub_iterator1->set_column_name("sub_col_1");
+
+        sub_column_iterators.emplace_back(std::move(sub_iterator1));
+        sub_column_iterators.emplace_back(std::move(nested_iterator));
+        auto iterator = 
std::make_unique<StructFileColumnIterator>(std::make_shared<ColumnReader>(),
+                                                                   
std::move(null_iterator),
+                                                                   
std::move(sub_column_iterators));
+        return iterator;
+    };
+
+    auto struct_iterator = create_struct_iterator();
+    struct_iterator->set_column_name("struct");
+
+    auto map_iterator = std::make_unique<MapFileColumnIterator>(
+            std::make_shared<ColumnReader>(),
+            
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>()), // null 
iterator
+            std::make_unique<OffsetFileColumnIterator>(
+                    
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>())),
+            
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>()),
+            std::move(struct_iterator));
+
+    auto array_iterator = std::make_unique<ArrayFileColumnIterator>(
+            std::make_shared<ColumnReader>(),
+            std::make_unique<OffsetFileColumnIterator>(
+                    
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>())),
+            std::move(map_iterator),
+            
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>()));
+
+    // here create:
+    // struct<
+    //      sub_col_1,
+    //      sub_col_2: array<
+    //          map<
+    //              key,
+    //              value: struct<
+    //                  sub_col_1,
+    //                  sub_col_2
+    //              >
+    //          >
+    //      >
+    //  >
+    array_iterator->set_column_name("sub_col_2");
+    auto iterator = create_struct_iterator2(std::move(array_iterator));
+    TColumnAccessPaths all_access_paths;
+    all_access_paths.name_access_paths.emplace_back();
+
+    // all access paths:
+    // self.sub_col_2.*.KEYS
+    // predicates paths empty
+    all_access_paths.name_access_paths[0].path = {"self", "sub_col_2", "*", 
"KEYS"};
+
+    TColumnAccessPaths predicate_access_paths;
+
+    iterator->set_column_name("self");
+    auto st = iterator->set_access_paths(all_access_paths, 
predicate_access_paths);
+
+    ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
+    ASSERT_EQ(iterator->_reading_flag, 
ColumnIterator::ReadingFlag::NORMAL_READING);
+
+    ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag,
+              ColumnIterator::ReadingFlag::SKIP_READING);
+    ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag,
+              ColumnIterator::ReadingFlag::NORMAL_READING);
+
+    auto* array_iter =
+            
static_cast<ArrayFileColumnIterator*>(iterator->_sub_column_iterators[1].get());
+    ASSERT_EQ(array_iter->_item_iterator->_reading_flag,
+              ColumnIterator::ReadingFlag::NORMAL_READING);
+
+    auto* map_iter = 
static_cast<MapFileColumnIterator*>(array_iter->_item_iterator.get());
+    ASSERT_EQ(map_iter->_key_iterator->_reading_flag, 
ColumnIterator::ReadingFlag::NORMAL_READING);
+    ASSERT_EQ(map_iter->_val_iterator->_reading_flag, 
ColumnIterator::ReadingFlag::SKIP_READING);
+}
+} // namespace doris::segment_v2
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to