This is an automated email from the ASF dual-hosted git repository. mrhhsg pushed a commit to branch cherry-pick-nested_column_prune_4.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8906295b374899763e3972ab21b35ca33de6f635 Author: Jerry Hu <[email protected]> AuthorDate: Thu Dec 18 11:06:52 2025 +0800 [opt](olap) Optimize reading by rowids of Map Column (#59043) ### What problem does this PR solve? Read as many consecutive rows as possible. Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: https://github.com/apache/doris-website/pull/1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into --> --- be/src/olap/rowset/segment_v2/column_reader.cpp | 67 +++++++++++++++++++++---- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index ca2f0e47705..27efea4fe08 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -1166,6 +1166,7 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t auto& next_starts_data = assert_cast<vectorized::ColumnOffset64&>(*next_starts_col).get_data(); std::vector<size_t> sizes(count, 0); size_t acc = base; + const auto original_size = offsets.get_data().back(); offsets.get_data().reserve(offsets.get_data().size() + count); for (size_t i = 0; i < count; ++i) { size_t sz = static_cast<size_t>(next_starts_data[i] - starts_data[i]); @@ -1181,21 +1182,65 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t auto keys_ptr = column_map->get_keys().assume_mutable(); auto vals_ptr = column_map->get_values().assume_mutable(); - for (size_t i = 0; i < count; ++i) { + size_t this_run = sizes[0]; + auto start_idx = starts_data[0]; + auto last_idx = starts_data[0] + this_run; + for (size_t i = 1; i < count; ++i) { size_t sz = sizes[i]; if (sz == 0) { continue; } - ordinal_t start = static_cast<ordinal_t>(starts_data[i]); - RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start)); - RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start)); - size_t n = sz; - bool dummy_has_null = false; - RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); - DCHECK(n == sz); - n = sz; - RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); - DCHECK(n == sz); + auto start = static_cast<ordinal_t>(starts_data[i]); + if (start != last_idx) { + size_t n = this_run; + bool dummy_has_null = false; + + if (this_run != 0) { + if (_key_iterator->reading_flag() != ReadingFlag::SKIP_READING) { + RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); + DCHECK(n == this_run); + } + + if (_val_iterator->reading_flag() != ReadingFlag::SKIP_READING) { + n = this_run; + RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); + DCHECK(n == this_run); + } + } + start_idx = start; + this_run = sz; + last_idx = start + sz; + continue; + } + + this_run += sz; + last_idx += sz; + } + + size_t n = this_run; + const size_t total_count = offsets.get_data().back() - original_size; + bool dummy_has_null = false; + if (_key_iterator->reading_flag() != ReadingFlag::SKIP_READING) { + if (this_run != 0) { + RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); + DCHECK(n == this_run); + } + } else { + keys_ptr->insert_many_defaults(total_count); + } + + if (_val_iterator->reading_flag() != ReadingFlag::SKIP_READING) { + if (this_run != 0) { + n = this_run; + RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); + DCHECK(n == this_run); + } + } else { + vals_ptr->insert_many_defaults(total_count); } return Status::OK(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
