This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch nested_column_prune
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/nested_column_prune by this
push:
new 91d6df510e6 add test cases
91d6df510e6 is described below
commit 91d6df510e63400679682c22367f211ea5253c66
Author: Hu Shenggang <[email protected]>
AuthorDate: Wed Oct 22 18:15:04 2025 +0800
add test cases
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 101 +++++-----
be/src/olap/rowset/segment_v2/column_reader.h | 1 +
.../olap/rowset/segment_v2/column_reader_test.cpp | 221 +++++++++++++++++++++
3 files changed, 271 insertions(+), 52 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 41702a463a1..48915887a79 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -77,6 +77,7 @@
#include "vec/common/assert_cast.h"
#include "vec/common/schema_util.h"
#include "vec/common/string_ref.h"
+#include "vec/common/typeid_cast.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_agg_state.h"
#include "vec/data_types/data_type_factory.hpp"
@@ -925,6 +926,48 @@ Status
ColumnReader::new_struct_iterator(ColumnIteratorUPtr* iterator,
return Status::OK();
}
+Result<TColumnAccessPaths> ColumnIterator::_get_sub_access_paths(
+ const TColumnAccessPaths& access_paths) {
+ TColumnAccessPaths sub_access_paths = access_paths;
+ for (auto it = sub_access_paths.name_access_paths.begin();
+ it != sub_access_paths.name_access_paths.end();) {
+ TColumnNameAccessPath& name_path = *it;
+ if (name_path.path.empty()) {
+ return ResultError(
+ Status::InternalError("Invalid access path for struct
column: path is empty"));
+ }
+
+ if (name_path.path[0] != _column_name) {
+ if (typeid_cast<ArrayFileColumnIterator*>(this) != nullptr) {
+ if (name_path.path[0] != "*") {
+ return ResultError(Status::InternalError(
+ R"(Invalid access path for array column: expected
name "{}", got "{}")",
+ _column_name, name_path.path[0]));
+ }
+ } else if (typeid_cast<MapFileColumnIterator*>(this) != nullptr) {
+ if (name_path.path[0] != "KEYS" && name_path.path[0] !=
"VALUES" &&
+ name_path.path[0] != "*") {
+ return ResultError(Status::InternalError(
+ R"(Invalid access path for map column: expected
name "{}", got "{}")",
+ _column_name, name_path.path[0]));
+ }
+ } else {
+ return ResultError(Status::InternalError(
+ R"(Invalid access path for column: expected name "{}",
got "{}")",
+ _column_name, name_path.path[0]));
+ }
+ }
+
+ name_path.path.erase(name_path.path.begin());
+ if (!name_path.path.empty()) {
+ ++it;
+ } else {
+ it = sub_access_paths.name_access_paths.erase(it);
+ }
+ }
+ return sub_access_paths;
+}
+
///====================== MapFileColumnIterator
============================////
MapFileColumnIterator::MapFileColumnIterator(std::shared_ptr<ColumnReader>
reader,
ColumnIteratorUPtr null_iterator,
@@ -1061,23 +1104,8 @@ Status MapFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_acc
<< " to READING_FOR_PREDICATE";
}
- auto get_sub_access_paths = [&](const TColumnAccessPaths& access_paths) ->
TColumnAccessPaths {
- TColumnAccessPaths sub_access_paths = access_paths;
- for (auto it = sub_access_paths.name_access_paths.begin();
- it != sub_access_paths.name_access_paths.end();) {
- TColumnNameAccessPath& name_path = *it;
- if (name_path.path.size() > 1) {
- name_path.path.erase(name_path.path.begin());
- ++it;
- } else {
- it = sub_access_paths.name_access_paths.erase(it);
- }
- }
- return sub_access_paths;
- };
-
- auto sub_all_access_paths = get_sub_access_paths(all_access_paths);
- auto sub_predicate_access_paths =
get_sub_access_paths(predicate_access_paths);
+ auto sub_all_access_paths =
DORIS_TRY(_get_sub_access_paths(all_access_paths));
+ auto sub_predicate_access_paths =
DORIS_TRY(_get_sub_access_paths(predicate_access_paths));
if (sub_all_access_paths.name_access_paths.empty()) {
return Status::OK();
@@ -1243,24 +1271,8 @@ Status StructFileColumnIterator::set_access_paths(
LOG(INFO) << "Struct column iterator set sub-column " << _column_name
<< " to READING_FOR_PREDICATE";
}
-
- auto get_sub_access_paths = [&](const TColumnAccessPaths& access_paths) ->
TColumnAccessPaths {
- TColumnAccessPaths sub_access_paths = access_paths;
- for (auto it = sub_access_paths.name_access_paths.begin();
- it != sub_access_paths.name_access_paths.end();) {
- TColumnNameAccessPath& name_path = *it;
- if (name_path.path.size() > 1) {
- name_path.path.erase(name_path.path.begin());
- ++it;
- } else {
- it = sub_access_paths.name_access_paths.erase(it);
- }
- }
- return sub_access_paths;
- };
-
- auto sub_all_access_paths = get_sub_access_paths(all_access_paths);
- auto sub_predicate_access_paths =
get_sub_access_paths(predicate_access_paths);
+ auto sub_all_access_paths =
DORIS_TRY(_get_sub_access_paths(all_access_paths));
+ auto sub_predicate_access_paths =
DORIS_TRY(_get_sub_access_paths(predicate_access_paths));
const auto no_sub_column_to_skip =
sub_all_access_paths.name_access_paths.empty();
const auto no_predicate_sub_column =
sub_predicate_access_paths.name_access_paths.empty();
@@ -1488,23 +1500,8 @@ Status ArrayFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_a
<< " to READING_FOR_PREDICATE";
}
- auto get_sub_access_paths = [&](const TColumnAccessPaths& access_paths) ->
TColumnAccessPaths {
- TColumnAccessPaths sub_access_paths = access_paths;
- for (auto it = sub_access_paths.name_access_paths.begin();
- it != sub_access_paths.name_access_paths.end();) {
- TColumnNameAccessPath& name_path = *it;
- if (name_path.path.size() > 1) {
- name_path.path.erase(name_path.path.begin());
- ++it;
- } else {
- it = sub_access_paths.name_access_paths.erase(it);
- }
- }
- return sub_access_paths;
- };
-
- auto sub_all_access_paths = get_sub_access_paths(all_access_paths);
- auto sub_predicate_access_paths =
get_sub_access_paths(predicate_access_paths);
+ auto sub_all_access_paths =
DORIS_TRY(_get_sub_access_paths(all_access_paths));
+ auto sub_predicate_access_paths =
DORIS_TRY(_get_sub_access_paths(predicate_access_paths));
const auto no_sub_column_to_skip =
sub_all_access_paths.name_access_paths.empty();
const auto no_predicate_sub_column =
sub_predicate_access_paths.name_access_paths.empty();
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h
b/be/src/olap/rowset/segment_v2/column_reader.h
index 062f8625678..d4ae7f86ea0 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -382,6 +382,7 @@ public:
void set_reading_flag(ReadingFlag flag) { _reading_flag = flag; }
protected:
+ Result<TColumnAccessPaths> _get_sub_access_paths(const TColumnAccessPaths&
access_paths);
ColumnIteratorOptions _opts;
ReadingFlag _reading_flag {ReadingFlag::NORMAL_READING};
diff --git a/be/test/olap/rowset/segment_v2/column_reader_test.cpp
b/be/test/olap/rowset/segment_v2/column_reader_test.cpp
new file mode 100644
index 00000000000..3523413d9b4
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/column_reader_test.cpp
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "olap/rowset/segment_v2/column_reader.h"
+
+#include <gen_cpp/Descriptors_types.h>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <memory>
+#include <thread>
+#include <vector>
+
+#include "agent/be_exec_version_manager.h"
+#include "common/config.h"
+#include "gen_cpp/olap_file.pb.h"
+#include "gen_cpp/segment_v2.pb.h"
+#include "io/fs/file_reader.h"
+#include "mock/mock_segment.h"
+#include "olap/rowset/segment_v2/column_reader_cache.h"
+#include "olap/rowset/segment_v2/segment.h"
+#include "olap/rowset/segment_v2/variant/variant_column_reader.h"
+#include "olap/tablet_schema.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris::segment_v2 {
+class ColumnReaderTest : public ::testing::Test {
+protected:
+ void SetUp() override {}
+ void TearDown() override {}
+};
+
+TEST_F(ColumnReaderTest, StructAccessPaths) {
+ auto create_struct_iterator = []() {
+ auto null_reader = std::make_shared<ColumnReader>();
+ auto null_iterator = std::make_unique<FileColumnIterator>(null_reader);
+
+ std::vector<ColumnIteratorUPtr> sub_column_iterators;
+ auto sub_reader1 = std::make_shared<ColumnReader>();
+ auto sub_iterator1 = std::make_unique<FileColumnIterator>(sub_reader1);
+ sub_iterator1->set_column_name("sub_col_1");
+ auto sub_reader2 = std::make_shared<ColumnReader>();
+ auto sub_iterator2 = std::make_unique<FileColumnIterator>(sub_reader2);
+ sub_iterator2->set_column_name("sub_col_2");
+
+ sub_column_iterators.emplace_back(std::move(sub_iterator1));
+ sub_column_iterators.emplace_back(std::move(sub_iterator2));
+ auto iterator =
std::make_unique<StructFileColumnIterator>(std::make_shared<ColumnReader>(),
+
std::move(null_iterator),
+
std::move(sub_column_iterators));
+ return iterator;
+ };
+
+ auto iterator = create_struct_iterator();
+ auto st = iterator->set_access_paths(TColumnAccessPaths {},
TColumnAccessPaths {});
+
+ ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
+ ASSERT_EQ(iterator->_reading_flag,
ColumnIterator::ReadingFlag::NORMAL_READING);
+
+ TColumnAccessPaths all_access_paths;
+ all_access_paths.name_access_paths.emplace_back();
+
+ TColumnAccessPaths predicate_access_paths;
+ predicate_access_paths.name_access_paths.emplace_back();
+
+ st = iterator->set_access_paths(all_access_paths, predicate_access_paths);
+ // empty paths leads to error
+ ASSERT_FALSE(st.ok());
+
+ // Only reading sub_col_1
+ // sub_col_2 should be set to SKIP_READING
+ all_access_paths.name_access_paths[0].path = {"self", "sub_col_1"};
+
+ predicate_access_paths.name_access_paths[0].path = {"self", "sub_col_1"};
+
+ st = iterator->set_access_paths(all_access_paths, predicate_access_paths);
+ // invalid name leads to error
+ ASSERT_FALSE(st.ok());
+
+ iterator->set_column_name("self");
+ // now column name is "self", should be ok
+ st = iterator->set_access_paths(all_access_paths, predicate_access_paths);
+ ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
+ ASSERT_EQ(iterator->_reading_flag,
ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
+
+ ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag,
+ ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
+ ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag,
+ ColumnIterator::ReadingFlag::SKIP_READING);
+
+ // Reading all sub columns
+ all_access_paths.name_access_paths[0].path = {"self"};
+ iterator = create_struct_iterator();
+ iterator->set_column_name("self");
+ st = iterator->set_access_paths(all_access_paths, predicate_access_paths);
+
+ ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
+ ASSERT_EQ(iterator->_reading_flag,
ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
+
+ ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag,
+ ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
+ ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag,
+ ColumnIterator::ReadingFlag::NORMAL_READING);
+}
+
+TEST_F(ColumnReaderTest, MultiAccessPaths) {
+ auto create_struct_iterator = []() {
+ auto null_reader = std::make_shared<ColumnReader>();
+ auto null_iterator = std::make_unique<FileColumnIterator>(null_reader);
+
+ std::vector<ColumnIteratorUPtr> sub_column_iterators;
+ auto sub_reader1 = std::make_shared<ColumnReader>();
+ auto sub_iterator1 = std::make_unique<FileColumnIterator>(sub_reader1);
+ sub_iterator1->set_column_name("sub_col_1");
+ auto sub_reader2 = std::make_shared<ColumnReader>();
+ auto sub_iterator2 = std::make_unique<FileColumnIterator>(sub_reader2);
+ sub_iterator2->set_column_name("sub_col_2");
+
+ sub_column_iterators.emplace_back(std::move(sub_iterator1));
+ sub_column_iterators.emplace_back(std::move(sub_iterator2));
+ auto iterator =
std::make_unique<StructFileColumnIterator>(std::make_shared<ColumnReader>(),
+
std::move(null_iterator),
+
std::move(sub_column_iterators));
+ return iterator;
+ };
+
+ auto create_struct_iterator2 = [](ColumnIteratorUPtr&& nested_iterator) {
+ auto null_reader = std::make_shared<ColumnReader>();
+ auto null_iterator = std::make_unique<FileColumnIterator>(null_reader);
+
+ std::vector<ColumnIteratorUPtr> sub_column_iterators;
+ auto sub_reader1 = std::make_shared<ColumnReader>();
+ auto sub_iterator1 = std::make_unique<FileColumnIterator>(sub_reader1);
+ sub_iterator1->set_column_name("sub_col_1");
+
+ sub_column_iterators.emplace_back(std::move(sub_iterator1));
+ sub_column_iterators.emplace_back(std::move(nested_iterator));
+ auto iterator =
std::make_unique<StructFileColumnIterator>(std::make_shared<ColumnReader>(),
+
std::move(null_iterator),
+
std::move(sub_column_iterators));
+ return iterator;
+ };
+
+ auto struct_iterator = create_struct_iterator();
+ struct_iterator->set_column_name("struct");
+
+ auto map_iterator = std::make_unique<MapFileColumnIterator>(
+ std::make_shared<ColumnReader>(),
+
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>()), // null
iterator
+ std::make_unique<OffsetFileColumnIterator>(
+
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>())),
+
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>()),
+ std::move(struct_iterator));
+
+ auto array_iterator = std::make_unique<ArrayFileColumnIterator>(
+ std::make_shared<ColumnReader>(),
+ std::make_unique<OffsetFileColumnIterator>(
+
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>())),
+ std::move(map_iterator),
+
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>()));
+
+ // here create:
+ // struct<
+ // sub_col_1,
+ // sub_col_2: array<
+ // map<
+ // key,
+ // value: struct<
+ // sub_col_1,
+ // sub_col_2
+ // >
+ // >
+ // >
+ // >
+ array_iterator->set_column_name("sub_col_2");
+ auto iterator = create_struct_iterator2(std::move(array_iterator));
+ TColumnAccessPaths all_access_paths;
+ all_access_paths.name_access_paths.emplace_back();
+
+ // all access paths:
+ // self.sub_col_2.*.KEYS
+ // predicates paths empty
+ all_access_paths.name_access_paths[0].path = {"self", "sub_col_2", "*",
"KEYS"};
+
+ TColumnAccessPaths predicate_access_paths;
+
+ iterator->set_column_name("self");
+ auto st = iterator->set_access_paths(all_access_paths,
predicate_access_paths);
+
+ ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
+ ASSERT_EQ(iterator->_reading_flag,
ColumnIterator::ReadingFlag::NORMAL_READING);
+
+ ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag,
+ ColumnIterator::ReadingFlag::SKIP_READING);
+ ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag,
+ ColumnIterator::ReadingFlag::NORMAL_READING);
+
+ auto* array_iter =
+
static_cast<ArrayFileColumnIterator*>(iterator->_sub_column_iterators[1].get());
+ ASSERT_EQ(array_iter->_item_iterator->_reading_flag,
+ ColumnIterator::ReadingFlag::NORMAL_READING);
+
+ auto* map_iter =
static_cast<MapFileColumnIterator*>(array_iter->_item_iterator.get());
+ ASSERT_EQ(map_iter->_key_iterator->_reading_flag,
ColumnIterator::ReadingFlag::NORMAL_READING);
+ ASSERT_EQ(map_iter->_val_iterator->_reading_flag,
ColumnIterator::ReadingFlag::SKIP_READING);
+}
+} // namespace doris::segment_v2
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]