This is an automated email from the ASF dual-hosted git repository.

mrhhsg pushed a commit to branch cherry-pick-nested_column_prune_4.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8906295b374899763e3972ab21b35ca33de6f635
Author: Jerry Hu <[email protected]>
AuthorDate: Thu Dec 18 11:06:52 2025 +0800

    [opt](olap) Optimize reading by rowids of Map Column (#59043)
    
    ### What problem does this PR solve?
    
    Read as many consecutive rows as possible.
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/olap/rowset/segment_v2/column_reader.cpp | 67 +++++++++++++++++++++----
 1 file changed, 56 insertions(+), 11 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index ca2f0e47705..27efea4fe08 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -1166,6 +1166,7 @@ Status MapFileColumnIterator::read_by_rowids(const 
rowid_t* rowids, const size_t
     auto& next_starts_data = 
assert_cast<vectorized::ColumnOffset64&>(*next_starts_col).get_data();
     std::vector<size_t> sizes(count, 0);
     size_t acc = base;
+    const auto original_size = offsets.get_data().back();
     offsets.get_data().reserve(offsets.get_data().size() + count);
     for (size_t i = 0; i < count; ++i) {
         size_t sz = static_cast<size_t>(next_starts_data[i] - starts_data[i]);
@@ -1181,21 +1182,65 @@ Status MapFileColumnIterator::read_by_rowids(const 
rowid_t* rowids, const size_t
     auto keys_ptr = column_map->get_keys().assume_mutable();
     auto vals_ptr = column_map->get_values().assume_mutable();
 
-    for (size_t i = 0; i < count; ++i) {
+    size_t this_run = sizes[0];
+    auto start_idx = starts_data[0];
+    auto last_idx = starts_data[0] + this_run;
+    for (size_t i = 1; i < count; ++i) {
         size_t sz = sizes[i];
         if (sz == 0) {
             continue;
         }
-        ordinal_t start = static_cast<ordinal_t>(starts_data[i]);
-        RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start));
-        RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start));
-        size_t n = sz;
-        bool dummy_has_null = false;
-        RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, 
&dummy_has_null));
-        DCHECK(n == sz);
-        n = sz;
-        RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, 
&dummy_has_null));
-        DCHECK(n == sz);
+        auto start = static_cast<ordinal_t>(starts_data[i]);
+        if (start != last_idx) {
+            size_t n = this_run;
+            bool dummy_has_null = false;
+
+            if (this_run != 0) {
+                if (_key_iterator->reading_flag() != 
ReadingFlag::SKIP_READING) {
+                    RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx));
+                    RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, 
&dummy_has_null));
+                    DCHECK(n == this_run);
+                }
+
+                if (_val_iterator->reading_flag() != 
ReadingFlag::SKIP_READING) {
+                    n = this_run;
+                    RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx));
+                    RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, 
&dummy_has_null));
+                    DCHECK(n == this_run);
+                }
+            }
+            start_idx = start;
+            this_run = sz;
+            last_idx = start + sz;
+            continue;
+        }
+
+        this_run += sz;
+        last_idx += sz;
+    }
+
+    size_t n = this_run;
+    const size_t total_count = offsets.get_data().back() - original_size;
+    bool dummy_has_null = false;
+    if (_key_iterator->reading_flag() != ReadingFlag::SKIP_READING) {
+        if (this_run != 0) {
+            RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx));
+            RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, 
&dummy_has_null));
+            DCHECK(n == this_run);
+        }
+    } else {
+        keys_ptr->insert_many_defaults(total_count);
+    }
+
+    if (_val_iterator->reading_flag() != ReadingFlag::SKIP_READING) {
+        if (this_run != 0) {
+            n = this_run;
+            RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx));
+            RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, 
&dummy_has_null));
+            DCHECK(n == this_run);
+        }
+    } else {
+        vals_ptr->insert_many_defaults(total_count);
     }
 
     return Status::OK();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to