mrhhsg commented on code in PR #59263:
URL: https://github.com/apache/doris/pull/59263#discussion_r3316240300
##########
be/cmake/thirdparty.cmake:
##########
@@ -106,8 +106,10 @@ add_thirdparty(zstd LIB64)
add_thirdparty(arrow LIB64)
add_thirdparty(arrow_flight LIB64)
add_thirdparty(arrow_flight_sql LIB64)
-add_thirdparty(arrow_dataset LIB64)
Review Comment:
已移除该不相关改动,thirdparty.cmake 已恢复到 master 对应状态。
##########
.github/workflows/be-ut-mac.yml:
##########
@@ -29,7 +29,7 @@ concurrency:
jobs:
Review Comment:
已移除该不相关 workflow 改动,be-ut-mac.yml 已恢复到 master 对应状态。
##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -882,6 +901,37 @@ Result<TColumnAccessPaths>
ColumnIterator::_get_sub_access_paths(
return sub_access_paths;
}
+Result<TColumnAccessPaths> ColumnIterator::_process_sub_access_paths(
+ const TColumnAccessPaths& access_paths, const bool is_predicate) {
+ TColumnAccessPaths sub_access_paths = access_paths;
+ for (auto it = sub_access_paths.begin(); it != sub_access_paths.end();) {
+ TColumnAccessPath& name_path = *it;
+ if (name_path.data_access_path.path.empty()) {
+ return ResultError(
+ Status::InternalError("Invalid access path for struct
column: path is empty"));
Review Comment:
已改成不绑定 struct 的通用错误信息:Invalid access path for column '<name>': path is
empty,避免 map/array/struct 共用路径时报错不准确。
##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -882,6 +901,37 @@ Result<TColumnAccessPaths>
ColumnIterator::_get_sub_access_paths(
return sub_access_paths;
}
+Result<TColumnAccessPaths> ColumnIterator::_process_sub_access_paths(
Review Comment:
已删除重复的 _process_sub_access_paths,将 predicate/non-predicate 逻辑合并到
_get_sub_access_paths(access_paths, is_predicate) 里复用。
##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -1081,39 +1161,74 @@ Status MapFileColumnIterator::read_by_rowids(const
rowid_t* rowids, const size_t
if (count == 0) {
return Status::OK();
}
+
// resolve ColumnMap and nullable wrapper
auto& column_map = assert_cast<ColumnMap&, TypeCheckOnRelease::DISABLE>(
dst->is_nullable() ?
static_cast<ColumnNullable&>(*dst).get_nested_column() : *dst);
- auto offsets_ptr =
IColumn::mutate(std::move(column_map.get_offsets_ptr()));
- Defer defer_offsets {[&] { column_map.get_offsets_ptr() =
std::move(offsets_ptr); }};
+ const bool read_meta_columns = need_to_read_meta_columns();
+ MutableColumnPtr offsets_ptr;
+ if (read_meta_columns) {
+ offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr()));
+ } else {
+ const auto base_offset =
+ column_map.get_offsets().empty() ? 0 :
column_map.get_offsets().back();
+ offsets_ptr = ColumnMap::COffsets::create();
+ assert_cast<ColumnOffset64&, TypeCheckOnRelease::DISABLE>(*offsets_ptr)
+ .insert_value(base_offset);
+ }
+ Defer defer_offsets {[&] {
+ if (read_meta_columns) {
+ column_map.get_offsets_ptr() = std::move(offsets_ptr);
+ }
+ }};
auto& offsets = static_cast<ColumnArray::ColumnOffsets&>(*offsets_ptr);
size_t base = offsets.get_data().empty() ? 0 : offsets.get_data().back();
// 1. bulk read null-map if nullable
std::vector<uint8_t> null_mask; // 0: not null, 1: null
- if (_map_reader->is_nullable()) {
- // For nullable map columns, the destination column must also be
nullable.
+ if (read_meta_columns) {
+ if (_map_reader->is_nullable()) {
+ // For nullable map columns, the destination column must also be
nullable.
+ if (UNLIKELY(!dst->is_nullable())) {
+ return Status::InternalError(
+ "unexpected non-nullable destination column for
nullable map reader");
+ }
+ MutableColumnPtr null_map_ptr =
+
static_cast<ColumnNullable&>(*dst).get_null_map_column_ptr();
+ size_t null_before = null_map_ptr->size();
+ RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count,
null_map_ptr));
+ // extract a light-weight view to decide element reads
+ auto& null_map_col = assert_cast<ColumnUInt8&>(*null_map_ptr);
+ null_mask.reserve(count);
+ for (size_t i = 0; i < count; ++i) {
+ null_mask.push_back(null_map_col.get_element(null_before + i));
+ }
+ } else if (dst->is_nullable()) {
+ // in not-null to null linked-schemachange mode,
+ // actually we do not change dat data include meta in footer,
+ // so may dst from changed meta which is nullable but old data is
not nullable,
+ // if so, we should set null_map to all null by default
+ MutableColumnPtr null_map_ptr =
+
static_cast<ColumnNullable&>(*dst).get_null_map_column_ptr();
+ auto& null_map = assert_cast<ColumnUInt8&>(*null_map_ptr);
+ null_map.insert_many_vals(0, count);
+ }
+ } else if (_map_reader->is_nullable()) {
+ // In lazy mode the parent null map has already been materialized
during predicate read.
+ // Read a temporary null map here only to compute child element ranges
correctly.
if (UNLIKELY(!dst->is_nullable())) {
return Status::InternalError(
"unexpected non-nullable destination column for nullable
map reader");
}
- auto null_map_ptr =
static_cast<ColumnNullable&>(*dst).get_null_map_column_ptr();
- size_t null_before = null_map_ptr->size();
- auto* null_map_col = null_map_ptr.get();
- MutableColumnPtr null_map_column = std::move(null_map_ptr);
- RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count,
null_map_column));
- // extract a light-weight view to decide element reads
+
+ MutableColumnPtr null_map_ptr = ColumnVector<TYPE_BOOLEAN>::create();
+ RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count,
null_map_ptr));
+
+ auto& null_map_col = assert_cast<ColumnUInt8&>(*null_map_ptr);
null_mask.reserve(count);
for (size_t i = 0; i < count; ++i) {
Review Comment:
已改成通过 get_data().data() + assign 批量填充 null_mask,两处 nullable map 读取路径都已处理。
##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -855,6 +855,25 @@ Status
ColumnReader::new_struct_iterator(ColumnIteratorUPtr* iterator,
return Status::OK();
}
+void ColumnIterator::_convert_to_place_holder_column(MutableColumnPtr& dst,
size_t count) {
+ if (_reading_flag != ReadingFlag::SKIP_READING && _reading_mode ==
ReadingMode::PREDICATE) {
+ _has_place_holder_column = true;
+ }
Review Comment:
已改成 LAZY 早返回 + else if 的结构;保留 predicate placeholder 默认值写入以保持 predicate phase
的 block 行数一致。
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]