This is an automated email from the ASF dual-hosted git repository.

zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 632f79197c2 [Exec](be) Support offset prue column and null column in 
BE (#61888)
632f79197c2 is described below

commit 632f79197c2a05954a4a7efb1829f38aa5a6a849
Author: HappenLee <[email protected]>
AuthorDate: Fri Apr 3 11:52:21 2026 +0800

    [Exec](be) Support offset prue column and null column in BE (#61888)
    
    ### What problem does this PR solve?
    Problem Summary: This PR includes two main changes:
    Add offset-only read optimization support for string, array, and map
    types in column reader
    
    ### Release note
    - [Storage] Add offset-only read optimization for complex types (string,
    array, map) to improve read performance
    ### Check List
    - Test: BE unit tests passed
    - Behavior changed: No (materialization fix prevents silent failures,
    now returns error explicitly)
    - Does this need documentation: No
    
    ---------
    
    Co-authored-by: englefly <[email protected]>
---
 be/src/core/column/column.h                    |   9 +
 be/src/core/column/column_nullable.h           |   8 +
 be/src/core/column/column_string.h             |  23 ++
 be/src/storage/segment/binary_dict_page.cpp    |  52 ++-
 be/src/storage/segment/binary_plain_page.h     |  39 ++-
 be/src/storage/segment/column_reader.cpp       | 429 +++++++++++++++++++++++--
 be/src/storage/segment/column_reader.h         |  65 +++-
 be/src/storage/segment/options.h               |   1 +
 be/test/storage/segment/column_reader_test.cpp |  40 ++-
 9 files changed, 628 insertions(+), 38 deletions(-)

diff --git a/be/src/core/column/column.h b/be/src/core/column/column.h
index 080be3b776e..d20ecc9d820 100644
--- a/be/src/core/column/column.h
+++ b/be/src/core/column/column.h
@@ -266,6 +266,15 @@ public:
                 "Method insert_many_continuous_binary_data is not supported 
for " + get_name());
     }
 
+    /// Insert `num` string entries with real length information but no actual
+    /// character data. Used by OFFSET_ONLY reading mode where actual string
+    /// content is not needed but length information must be preserved.
+    virtual void insert_offsets_from_lengths(const uint32_t* lengths, size_t 
num) {
+        throw doris::Exception(
+                ErrorCode::NOT_IMPLEMENTED_ERROR,
+                "Method insert_offsets_from_lengths is not supported for " + 
get_name());
+    }
+
     virtual void insert_many_strings(const StringRef* strings, size_t num) {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
                                "Method insert_many_strings is not supported 
for " + get_name());
diff --git a/be/src/core/column/column_nullable.h 
b/be/src/core/column/column_nullable.h
index 51f88dc7542..7e2406b8e92 100644
--- a/be/src/core/column/column_nullable.h
+++ b/be/src/core/column/column_nullable.h
@@ -185,6 +185,14 @@ public:
         get_nested_column().insert_many_continuous_binary_data(data, offsets, 
num);
     }
 
+    void insert_offsets_from_lengths(const uint32_t* lengths, size_t num) 
override {
+        if (UNLIKELY(num == 0)) {
+            return;
+        }
+        push_false_to_nullmap(num);
+        get_nested_column().insert_offsets_from_lengths(lengths, num);
+    }
+
     // Default value in `ColumnNullable` is null
     void insert_default() override {
         get_nested_column().insert_default();
diff --git a/be/src/core/column/column_string.h 
b/be/src/core/column/column_string.h
index 1f35b8b602b..27804a649e4 100644
--- a/be/src/core/column/column_string.h
+++ b/be/src/core/column/column_string.h
@@ -276,6 +276,29 @@ public:
         sanity_check_simple();
     }
 
+    // Insert `num` string entries with real length information but no actual
+    // character data. The `lengths` array provides the byte length of each
+    // string. Offsets are built with correct cumulative sizes so that
+    // size_at(i) returns the true string length. The chars buffer is extended
+    // with zero-filled padding to maintain the invariant chars.size() == 
offsets.back().
+    // Used by OFFSET_ONLY reading mode where actual string content is not 
needed
+    // but length information must be preserved (e.g., for length() function).
+    void insert_offsets_from_lengths(const uint32_t* lengths, size_t num) 
override {
+        if (UNLIKELY(num == 0)) {
+            return;
+        }
+        const auto old_rows = offsets.size();
+        // Build cumulative offsets from lengths
+        offsets.resize(old_rows + num);
+        auto* offsets_ptr = &offsets[old_rows];
+        size_t running_offset = offsets[old_rows - 1];
+        for (size_t i = 0; i < num; ++i) {
+            running_offset += lengths[i];
+            offsets_ptr[i] = static_cast<T>(running_offset);
+        }
+        chars.resize(offsets[old_rows + num - 1]);
+    }
+
     void insert_many_strings(const StringRef* strings, size_t num) override {
         size_t new_size = 0;
         for (size_t i = 0; i < num; i++) {
diff --git a/be/src/storage/segment/binary_dict_page.cpp 
b/be/src/storage/segment/binary_dict_page.cpp
index 04147b00056..80f1fac916b 100644
--- a/be/src/storage/segment/binary_dict_page.cpp
+++ b/be/src/storage/segment/binary_dict_page.cpp
@@ -29,6 +29,7 @@
 #include "common/logging.h"
 #include "common/status.h"
 #include "core/column/column.h"
+#include "core/column/column_string.h"
 #include "storage/segment/binary_plain_page_v2.h"
 #include "storage/segment/bitshuffle_page.h"
 #include "storage/segment/encoding_info.h"
@@ -318,11 +319,28 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, 
MutableColumnPtr& dst) {
                                                         
_bit_shuffle_ptr->_cur_index));
     *n = max_fetch;
 
-    const auto* data_array = reinterpret_cast<const 
int32_t*>(_bit_shuffle_ptr->get_data(0));
-    size_t start_index = _bit_shuffle_ptr->_cur_index;
+    if (_options.only_read_offsets) {
+        // OFFSET_ONLY mode: resolve dict codes to get real string lengths
+        // without copying actual char data. This allows length() to work.
+        const auto* data_array = reinterpret_cast<const 
int32_t*>(_bit_shuffle_ptr->get_data(0));
+        size_t start_index = _bit_shuffle_ptr->_cur_index;
+        // Reuse _buffer (int32_t vector) to store uint32_t lengths.
+        // int32_t and uint32_t have the same size/alignment, and string
+        // lengths are always non-negative, so the bit patterns are identical.
+        _buffer.resize(max_fetch);
+        for (size_t i = 0; i < max_fetch; ++i) {
+            int32_t codeword = data_array[start_index + i];
+            _buffer[i] = static_cast<int32_t>(_dict_word_info[codeword].size);
+        }
+        dst->insert_offsets_from_lengths(reinterpret_cast<const 
uint32_t*>(_buffer.data()),
+                                         max_fetch);
+    } else {
+        const auto* data_array = reinterpret_cast<const 
int32_t*>(_bit_shuffle_ptr->get_data(0));
+        size_t start_index = _bit_shuffle_ptr->_cur_index;
 
-    dst->insert_many_dict_data(data_array, start_index, _dict_word_info, 
max_fetch,
-                               _num_dict_items);
+        dst->insert_many_dict_data(data_array, start_index, _dict_word_info, 
max_fetch,
+                                   _num_dict_items);
+    }
 
     _bit_shuffle_ptr->_cur_index += max_fetch;
 
@@ -343,8 +361,32 @@ Status BinaryDictPageDecoder::read_by_rowids(const 
rowid_t* rowids, ordinal_t pa
         return Status::OK();
     }
 
-    const auto* data_array = reinterpret_cast<const 
int32_t*>(_bit_shuffle_ptr->get_data(0));
     auto total = *n;
+
+    if (_options.only_read_offsets) {
+        // OFFSET_ONLY mode: resolve dict codes to get real string lengths
+        // without copying actual char data. This allows length() to work 
correctly.
+        const auto* data_array = reinterpret_cast<const 
int32_t*>(_bit_shuffle_ptr->get_data(0));
+        size_t read_count = 0;
+        _buffer.resize(total);
+        for (size_t i = 0; i < total; ++i) {
+            ordinal_t ord = rowids[i] - page_first_ordinal;
+            if (ord >= _bit_shuffle_ptr->_num_elements) [[unlikely]] {
+                break;
+            }
+            int32_t codeword = data_array[ord];
+            _buffer[read_count] = 
static_cast<int32_t>(_dict_word_info[codeword].size);
+            read_count++;
+        }
+        if (read_count > 0) {
+            dst->insert_offsets_from_lengths(reinterpret_cast<const 
uint32_t*>(_buffer.data()),
+                                             read_count);
+        }
+        *n = read_count;
+        return Status::OK();
+    }
+
+    const auto* data_array = reinterpret_cast<const 
int32_t*>(_bit_shuffle_ptr->get_data(0));
     size_t read_count = 0;
     _buffer.resize(total);
     for (size_t i = 0; i < total; ++i) {
diff --git a/be/src/storage/segment/binary_plain_page.h 
b/be/src/storage/segment/binary_plain_page.h
index 4ed2158c82e..6734961a66c 100644
--- a/be/src/storage/segment/binary_plain_page.h
+++ b/be/src/storage/segment/binary_plain_page.h
@@ -30,7 +30,6 @@
 
 #include "common/logging.h"
 #include "core/column/column_complex.h"
-#include "core/column/column_nullable.h"
 #include "storage/olap_common.h"
 #include "storage/segment/options.h"
 #include "storage/segment/page_builder.h"
@@ -244,6 +243,21 @@ public:
         }
         const size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems - 
_cur_idx));
 
+        if (_options.only_read_offsets) {
+            // OFFSET_ONLY mode: read string lengths from page offset trailer
+            // without copying actual char data. This allows length() to work.
+            _offsets.resize(max_fetch);
+            for (size_t i = 0; i < max_fetch; ++i) {
+                uint32_t str_start = offset(_cur_idx + i);
+                uint32_t str_end = offset(_cur_idx + i + 1);
+                _offsets[i] = str_end - str_start;
+            }
+            dst->insert_offsets_from_lengths(_offsets.data(), max_fetch);
+            _cur_idx += max_fetch;
+            *n = max_fetch;
+            return Status::OK();
+        }
+
         uint32_t last_offset = guarded_offset(_cur_idx);
         _offsets.resize(max_fetch + 1);
         _offsets[0] = last_offset;
@@ -279,6 +293,29 @@ public:
         }
 
         auto total = *n;
+
+        if (_options.only_read_offsets) {
+            // OFFSET_ONLY mode: read string lengths from page offset trailer
+            // without copying actual char data. This allows length() to work.
+            size_t read_count = 0;
+            _offsets.resize(total);
+            for (size_t i = 0; i < total; ++i) {
+                ordinal_t ord = rowids[i] - page_first_ordinal;
+                if (UNLIKELY(ord >= _num_elems)) {
+                    break;
+                }
+                uint32_t str_start = offset(ord);
+                uint32_t str_end = offset(ord + 1);
+                _offsets[read_count] = str_end - str_start;
+                read_count++;
+            }
+            if (read_count > 0) {
+                dst->insert_offsets_from_lengths(_offsets.data(), read_count);
+            }
+            *n = read_count;
+            return Status::OK();
+        }
+
         size_t read_count = 0;
         _binary_data.resize(total);
         for (size_t i = 0; i < total; ++i) {
diff --git a/be/src/storage/segment/column_reader.cpp 
b/be/src/storage/segment/column_reader.cpp
index 13e277d5660..cf9960592e2 100644
--- a/be/src/storage/segment/column_reader.cpp
+++ b/be/src/storage/segment/column_reader.cpp
@@ -728,7 +728,11 @@ Status ColumnReader::new_iterator(ColumnIteratorUPtr* 
iterator, const TabletColu
         return Status::OK();
     }
     if (is_scalar_type(_meta_type)) {
-        *iterator = std::make_unique<FileColumnIterator>(shared_from_this());
+        if (is_string_type(_meta_type)) {
+            *iterator = 
std::make_unique<StringFileColumnIterator>(shared_from_this());
+        } else {
+            *iterator = 
std::make_unique<FileColumnIterator>(shared_from_this());
+        }
         (*iterator)->set_column_name(tablet_column ? tablet_column->name() : 
"");
         return Status::OK();
     } else {
@@ -910,10 +914,22 @@ Status MapFileColumnIterator::seek_to_ordinal(ordinal_t 
ord) {
         return Status::OK();
     }
 
+    if (read_null_map_only()) {
+        // In NULL_MAP_ONLY mode, only seek the null iterator; skip 
offset/key/val iterators
+        if (_map_reader->is_nullable() && _null_iterator) {
+            RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord));
+        }
+        return Status::OK();
+    }
+
     if (_map_reader->is_nullable()) {
         RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord));
     }
     RETURN_IF_ERROR(_offsets_iterator->seek_to_ordinal(ord));
+    if (read_offset_only()) {
+        // In OFFSET_ONLY mode, key/value iterators are SKIP_READING, no need 
to seek them
+        return Status::OK();
+    }
     // here to use offset info
     ordinal_t offset = 0;
     RETURN_IF_ERROR(_offsets_iterator->_peek_one_offset(&offset));
@@ -952,6 +968,30 @@ Status MapFileColumnIterator::next_batch(size_t* n, 
MutableColumnPtr& dst, bool*
         return Status::OK();
     }
 
+    if (read_null_map_only()) {
+        // NULL_MAP_ONLY mode: read null map, fill nested ColumnMap with empty 
defaults
+        DORIS_CHECK(dst->is_nullable());
+        auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+        auto null_map_ptr = nullable_col.get_null_map_column_ptr();
+        size_t num_read = *n;
+        if (_null_iterator) {
+            bool null_signs_has_null = false;
+            RETURN_IF_ERROR(
+                    _null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        } else {
+            // schema-change: column became nullable but old segment has no 
null data
+            auto& null_map = assert_cast<ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map_ptr);
+            null_map.insert_many_vals(0, num_read);
+        }
+        DCHECK(num_read == *n);
+        // fill nested ColumnMap with empty (zero-element) maps
+        auto& column_map = assert_cast<ColumnMap&, 
TypeCheckOnRelease::DISABLE>(
+                nullable_col.get_nested_column());
+        column_map.insert_many_defaults(num_read);
+        *has_null = true;
+        return Status::OK();
+    }
+
     auto& column_map = assert_cast<ColumnMap&, TypeCheckOnRelease::DISABLE>(
             dst->is_nullable() ? 
static_cast<ColumnNullable&>(*dst).get_nested_column() : *dst);
     auto column_offsets_ptr = column_map.get_offsets_column().assume_mutable();
@@ -970,12 +1010,18 @@ Status MapFileColumnIterator::next_batch(size_t* n, 
MutableColumnPtr& dst, bool*
     auto val_ptr = column_map.get_values().assume_mutable();
 
     if (num_items > 0) {
-        size_t num_read = num_items;
-        bool key_has_null = false;
-        bool val_has_null = false;
-        RETURN_IF_ERROR(_key_iterator->next_batch(&num_read, key_ptr, 
&key_has_null));
-        RETURN_IF_ERROR(_val_iterator->next_batch(&num_read, val_ptr, 
&val_has_null));
-        DCHECK(num_read == num_items);
+        if (read_offset_only()) {
+            // OFFSET_ONLY mode: skip reading actual key/value data, fill with 
defaults
+            key_ptr->insert_many_defaults(num_items);
+            val_ptr->insert_many_defaults(num_items);
+        } else {
+            size_t num_read = num_items;
+            bool key_has_null = false;
+            bool val_has_null = false;
+            RETURN_IF_ERROR(_key_iterator->next_batch(&num_read, key_ptr, 
&key_has_null));
+            RETURN_IF_ERROR(_val_iterator->next_batch(&num_read, val_ptr, 
&val_has_null));
+            DCHECK(num_read == num_items);
+        }
 
         column_map.get_keys_ptr() = std::move(key_ptr);
         column_map.get_values_ptr() = std::move(val_ptr);
@@ -1008,6 +1054,27 @@ Status MapFileColumnIterator::read_by_rowids(const 
rowid_t* rowids, const size_t
         dst->insert_many_defaults(count);
         return Status::OK();
     }
+
+    if (read_null_map_only()) {
+        // NULL_MAP_ONLY mode: read null map by rowids, fill nested ColumnMap 
with empty defaults
+        DORIS_CHECK(dst->is_nullable());
+        auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+        if (_null_iterator) {
+            auto null_map_ptr = nullable_col.get_null_map_column_ptr();
+            RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count, 
null_map_ptr));
+        } else {
+            // schema-change: column became nullable but old segment has no 
null data
+            auto null_map_ptr = nullable_col.get_null_map_column_ptr();
+            auto& null_map = assert_cast<ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map_ptr);
+            null_map.insert_many_vals(0, count);
+        }
+        // fill nested ColumnMap with empty (zero-element) maps
+        auto& column_map = assert_cast<ColumnMap&, 
TypeCheckOnRelease::DISABLE>(
+                nullable_col.get_nested_column());
+        column_map.insert_many_defaults(count);
+        return Status::OK();
+    }
+
     if (count == 0) {
         return Status::OK();
     }
@@ -1195,21 +1262,47 @@ Status MapFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_acc
         return Status::OK();
     }
 
+    // Check for meta-only modes (OFFSET_ONLY or NULL_MAP_ONLY)
+    _check_and_set_meta_read_mode(sub_all_access_paths);
+    if (read_offset_only()) {
+        _key_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
+        _val_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
+        DLOG(INFO) << "Map column iterator set column " << _column_name
+                   << " to OFFSET_ONLY reading mode, key/value columns set to 
SKIP_READING";
+        return Status::OK();
+    }
+    if (read_null_map_only()) {
+        _key_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
+        _val_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
+        DLOG(INFO) << "Map column iterator set column " << _column_name
+                   << " to NULL_MAP_ONLY reading mode, key/value columns set 
to SKIP_READING";
+        return Status::OK();
+    }
+
     TColumnAccessPaths key_all_access_paths;
     TColumnAccessPaths val_all_access_paths;
     TColumnAccessPaths key_predicate_access_paths;
     TColumnAccessPaths val_predicate_access_paths;
 
     for (auto paths : sub_all_access_paths) {
-        if (paths.data_access_path.path[0] == "*") {
-            paths.data_access_path.path[0] = _key_iterator->column_name();
-            key_all_access_paths.emplace_back(paths);
+        if (paths.data_access_path.path[0] == ACCESS_ALL) {
+            // ACCESS_ALL means element_at(map, key) style access: the key 
column must be
+            // fully read so that the runtime can match the requested key, 
while any sub-path
+            // qualifiers (e.g. OFFSET) apply only to the value column.
+            // For key: create a path with just the column name (= full data 
access).
+            TColumnAccessPath key_path;
+            key_path.__set_type(paths.type);
+            TDataAccessPath key_data_path;
+            key_data_path.__set_path({_key_iterator->column_name()});
+            key_path.__set_data_access_path(key_data_path);
+            key_all_access_paths.emplace_back(std::move(key_path));
+            // For value: pass the full sub-path so qualifiers like OFFSET 
propagate.
             paths.data_access_path.path[0] = _val_iterator->column_name();
             val_all_access_paths.emplace_back(paths);
-        } else if (paths.data_access_path.path[0] == "KEYS") {
+        } else if (paths.data_access_path.path[0] == ACCESS_MAP_KEYS) {
             paths.data_access_path.path[0] = _key_iterator->column_name();
             key_all_access_paths.emplace_back(paths);
-        } else if (paths.data_access_path.path[0] == "VALUES") {
+        } else if (paths.data_access_path.path[0] == ACCESS_MAP_VALUES) {
             paths.data_access_path.path[0] = _val_iterator->column_name();
             val_all_access_paths.emplace_back(paths);
         }
@@ -1218,15 +1311,20 @@ Status MapFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_acc
     const auto need_read_values = !val_all_access_paths.empty();
 
     for (auto paths : sub_predicate_access_paths) {
-        if (paths.data_access_path.path[0] == "*") {
-            paths.data_access_path.path[0] = _key_iterator->column_name();
-            key_predicate_access_paths.emplace_back(paths);
+        if (paths.data_access_path.path[0] == ACCESS_ALL) {
+            // Same logic as above: key needs full data, value gets the 
sub-path.
+            TColumnAccessPath key_path;
+            key_path.__set_type(paths.type);
+            TDataAccessPath key_data_path;
+            key_data_path.__set_path({_key_iterator->column_name()});
+            key_path.__set_data_access_path(key_data_path);
+            key_predicate_access_paths.emplace_back(std::move(key_path));
             paths.data_access_path.path[0] = _val_iterator->column_name();
             val_predicate_access_paths.emplace_back(paths);
-        } else if (paths.data_access_path.path[0] == "KEYS") {
+        } else if (paths.data_access_path.path[0] == ACCESS_MAP_KEYS) {
             paths.data_access_path.path[0] = _key_iterator->column_name();
             key_predicate_access_paths.emplace_back(paths);
-        } else if (paths.data_access_path.path[0] == "VALUES") {
+        } else if (paths.data_access_path.path[0] == ACCESS_MAP_VALUES) {
             paths.data_access_path.path[0] = _val_iterator->column_name();
             val_predicate_access_paths.emplace_back(paths);
         }
@@ -1285,6 +1383,30 @@ Status StructFileColumnIterator::next_batch(size_t* n, 
MutableColumnPtr& dst, bo
         return Status::OK();
     }
 
+    if (read_null_map_only()) {
+        // NULL_MAP_ONLY mode: read null map, fill nested ColumnStruct with 
empty defaults
+        DORIS_CHECK(dst->is_nullable());
+        auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+        auto null_map_ptr = nullable_col.get_null_map_column_ptr();
+        size_t num_read = *n;
+        if (_null_iterator) {
+            bool null_signs_has_null = false;
+            RETURN_IF_ERROR(
+                    _null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        } else {
+            // schema-change: column became nullable but old segment has no 
null data
+            auto& null_map = assert_cast<ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map_ptr);
+            null_map.insert_many_vals(0, num_read);
+        }
+        DCHECK(num_read == *n);
+        // fill nested ColumnStruct with defaults to maintain consistent 
column sizes
+        auto& column_struct = assert_cast<ColumnStruct&, 
TypeCheckOnRelease::DISABLE>(
+                nullable_col.get_nested_column());
+        column_struct.insert_many_defaults(num_read);
+        *has_null = true;
+        return Status::OK();
+    }
+
     auto& column_struct = assert_cast<ColumnStruct&, 
TypeCheckOnRelease::DISABLE>(
             dst->is_nullable() ? 
static_cast<ColumnNullable&>(*dst).get_nested_column() : *dst);
     for (size_t i = 0; i < column_struct.tuple_size(); i++) {
@@ -1324,6 +1446,14 @@ Status 
StructFileColumnIterator::seek_to_ordinal(ordinal_t ord) {
         return Status::OK();
     }
 
+    if (read_null_map_only()) {
+        // In NULL_MAP_ONLY mode, only seek the null iterator; skip all 
sub-column iterators
+        if (_struct_reader->is_nullable() && _null_iterator) {
+            RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord));
+        }
+        return Status::OK();
+    }
+
     for (auto& column_iterator : _sub_column_iterators) {
         RETURN_IF_ERROR(column_iterator->seek_to_ordinal(ord));
     }
@@ -1377,7 +1507,7 @@ Status StructFileColumnIterator::read_by_rowids(const 
rowid_t* rowids, const siz
         }
         RETURN_IF_ERROR(seek_to_ordinal(start_idx));
         size_t num_read = this_run;
-        RETURN_IF_ERROR(next_batch(&num_read, dst, nullptr));
+        RETURN_IF_ERROR(next_batch(&num_read, dst));
         DCHECK_EQ(num_read, this_run);
 
         start_idx = rowids[i];
@@ -1387,7 +1517,7 @@ Status StructFileColumnIterator::read_by_rowids(const 
rowid_t* rowids, const siz
 
     RETURN_IF_ERROR(seek_to_ordinal(start_idx));
     size_t num_read = this_run;
-    RETURN_IF_ERROR(next_batch(&num_read, dst, nullptr));
+    RETURN_IF_ERROR(next_batch(&num_read, dst));
     DCHECK_EQ(num_read, this_run);
     return Status::OK();
 }
@@ -1428,6 +1558,17 @@ Status StructFileColumnIterator::set_access_paths(
     auto sub_all_access_paths = 
DORIS_TRY(_get_sub_access_paths(all_access_paths));
     auto sub_predicate_access_paths = 
DORIS_TRY(_get_sub_access_paths(predicate_access_paths));
 
+    // Check for NULL_MAP_ONLY mode: only read null map, skip all sub-columns
+    _check_and_set_meta_read_mode(sub_all_access_paths);
+    if (read_null_map_only()) {
+        for (auto& sub_iterator : _sub_column_iterators) {
+            sub_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
+        }
+        DLOG(INFO) << "Struct column iterator set column " << _column_name
+                   << " to NULL_MAP_ONLY reading mode, all sub-columns set to 
SKIP_READING";
+        return Status::OK();
+    }
+
     const auto no_sub_column_to_skip = sub_all_access_paths.empty();
     const auto no_predicate_sub_column = sub_predicate_access_paths.empty();
 
@@ -1567,6 +1708,10 @@ Status ArrayFileColumnIterator::init(const 
ColumnIteratorOptions& opts) {
 }
 
 Status ArrayFileColumnIterator::_seek_by_offsets(ordinal_t ord) {
+    if (read_offset_only()) {
+        // In OFFSET_ONLY mode, item iterator is SKIP_READING, no need to seek 
it
+        return Status::OK();
+    }
     // using offsets info
     ordinal_t offset = 0;
     RETURN_IF_ERROR(_offset_iterator->_peek_one_offset(&offset));
@@ -1580,6 +1725,14 @@ Status 
ArrayFileColumnIterator::seek_to_ordinal(ordinal_t ord) {
         return Status::OK();
     }
 
+    if (read_null_map_only()) {
+        // In NULL_MAP_ONLY mode, only seek the null iterator; skip offset and 
item iterators
+        if (_array_reader->is_nullable() && _null_iterator) {
+            RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord));
+        }
+        return Status::OK();
+    }
+
     RETURN_IF_ERROR(_offset_iterator->seek_to_ordinal(ord));
     if (_array_reader->is_nullable()) {
         RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord));
@@ -1594,6 +1747,30 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, 
MutableColumnPtr& dst, boo
         return Status::OK();
     }
 
+    if (read_null_map_only()) {
+        // NULL_MAP_ONLY mode: read null map, fill nested ColumnArray with 
empty defaults
+        DORIS_CHECK(dst->is_nullable());
+        auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+        auto null_map_ptr = nullable_col.get_null_map_column_ptr();
+        size_t num_read = *n;
+        if (_null_iterator) {
+            bool null_signs_has_null = false;
+            RETURN_IF_ERROR(
+                    _null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        } else {
+            // schema-change: column became nullable but old segment has no 
null data
+            auto& null_map = assert_cast<ColumnUInt8&, 
TypeCheckOnRelease::DISABLE>(*null_map_ptr);
+            null_map.insert_many_vals(0, num_read);
+        }
+        DCHECK(num_read == *n);
+        // fill nested ColumnArray with empty (zero-length) arrays
+        auto& column_array = assert_cast<ColumnArray&, 
TypeCheckOnRelease::DISABLE>(
+                nullable_col.get_nested_column());
+        column_array.insert_many_defaults(num_read);
+        *has_null = true;
+        return Status::OK();
+    }
+
     const auto* column_array = check_and_get_column<ColumnArray>(
             dst->is_nullable() ? 
static_cast<ColumnNullable&>(*dst).get_nested_column() : *dst);
 
@@ -1610,10 +1787,16 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, 
MutableColumnPtr& dst, boo
             column_offsets.get_data().back() - column_offsets.get_data()[start 
- 1]; // -1 is valid
     auto column_items_ptr = column_array->get_data().assume_mutable();
     if (num_items > 0) {
-        size_t num_read = num_items;
-        bool items_has_null = false;
-        RETURN_IF_ERROR(_item_iterator->next_batch(&num_read, 
column_items_ptr, &items_has_null));
-        DCHECK(num_read == num_items);
+        if (read_offset_only()) {
+            // OFFSET_ONLY mode: skip reading actual item data, fill with 
defaults
+            column_items_ptr->insert_many_defaults(num_items);
+        } else {
+            size_t num_read = num_items;
+            bool items_has_null = false;
+            RETURN_IF_ERROR(
+                    _item_iterator->next_batch(&num_read, column_items_ptr, 
&items_has_null));
+            DCHECK(num_read == num_items);
+        }
     }
 
     if (dst->is_nullable()) {
@@ -1667,11 +1850,10 @@ Status ArrayFileColumnIterator::read_by_rowids(const 
rowid_t* rowids, const size
     }
 
     for (size_t i = 0; i < count; ++i) {
-        // TODO(cambyzju): now read array one by one, need optimize later
+        // TODO(cambyszju): now read array one by one, need optimize later
         RETURN_IF_ERROR(seek_to_ordinal(rowids[i]));
         size_t num_read = 1;
-        RETURN_IF_ERROR(next_batch(&num_read, dst, nullptr));
-        DCHECK(num_read == 1);
+        RETURN_IF_ERROR(next_batch(&num_read, dst));
     }
     return Status::OK();
 }
@@ -1700,12 +1882,27 @@ Status ArrayFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_a
     auto sub_all_access_paths = 
DORIS_TRY(_get_sub_access_paths(all_access_paths));
     auto sub_predicate_access_paths = 
DORIS_TRY(_get_sub_access_paths(predicate_access_paths));
 
+    // Check for meta-only modes (OFFSET_ONLY or NULL_MAP_ONLY)
+    _check_and_set_meta_read_mode(sub_all_access_paths);
+    if (read_offset_only()) {
+        _item_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
+        DLOG(INFO) << "Array column iterator set column " << _column_name
+                   << " to OFFSET_ONLY reading mode, item column set to 
SKIP_READING";
+        return Status::OK();
+    }
+    if (read_null_map_only()) {
+        _item_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
+        DLOG(INFO) << "Array column iterator set column " << _column_name
+                   << " to NULL_MAP_ONLY reading mode, item column set to 
SKIP_READING";
+        return Status::OK();
+    }
+
     const auto no_sub_column_to_skip = sub_all_access_paths.empty();
     const auto no_predicate_sub_column = sub_predicate_access_paths.empty();
 
     if (!no_sub_column_to_skip) {
         for (auto& path : sub_all_access_paths) {
-            if (path.data_access_path.path[0] == "*") {
+            if (path.data_access_path.path[0] == ACCESS_ALL) {
                 path.data_access_path.path[0] = _item_iterator->column_name();
             }
         }
@@ -1713,7 +1910,7 @@ Status ArrayFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_a
 
     if (!no_predicate_sub_column) {
         for (auto& path : sub_predicate_access_paths) {
-            if (path.data_access_path.path[0] == "*") {
+            if (path.data_access_path.path[0] == ACCESS_ALL) {
                 path.data_access_path.path[0] = _item_iterator->column_name();
             }
         }
@@ -1727,10 +1924,68 @@ Status ArrayFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_a
     return Status::OK();
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// StringFileColumnIterator implementation
+////////////////////////////////////////////////////////////////////////////////
+
+StringFileColumnIterator::StringFileColumnIterator(std::shared_ptr<ColumnReader>
 reader)
+        : FileColumnIterator(std::move(reader)) {}
+
+Status StringFileColumnIterator::init(const ColumnIteratorOptions& opts) {
+    if (read_offset_only()) {
+        // Propagate only_read_offsets to the FileColumnIterator's options
+        auto modified_opts = opts;
+        modified_opts.only_read_offsets = true;
+        return FileColumnIterator::init(modified_opts);
+    }
+    return FileColumnIterator::init(opts);
+}
+
+Status StringFileColumnIterator::set_access_paths(
+        const TColumnAccessPaths& all_access_paths,
+        const TColumnAccessPaths& predicate_access_paths) {
+    if (all_access_paths.empty()) {
+        return Status::OK();
+    }
+
+    if (!predicate_access_paths.empty()) {
+        set_reading_flag(ReadingFlag::READING_FOR_PREDICATE);
+    }
+
+    // Strip the column name from path[0] before checking for meta-only modes.
+    // Raw paths look like ["col_name", "OFFSET"] or ["col_name", "NULL"].
+    auto sub_all_access_paths = 
DORIS_TRY(_get_sub_access_paths(all_access_paths));
+    _check_and_set_meta_read_mode(sub_all_access_paths);
+    if (read_offset_only()) {
+        DLOG(INFO) << "String column iterator set column " << _column_name
+                   << " to OFFSET_ONLY reading mode";
+    } else if (read_null_map_only()) {
+        DLOG(INFO) << "String column iterator set column " << _column_name
+                   << " to NULL_MAP_ONLY reading mode";
+    }
+
+    return Status::OK();
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 
 FileColumnIterator::FileColumnIterator(std::shared_ptr<ColumnReader> reader) : 
_reader(reader) {}
 
+void ColumnIterator::_check_and_set_meta_read_mode(const TColumnAccessPaths& 
sub_all_access_paths) {
+    for (const auto& path : sub_all_access_paths) {
+        if (!path.data_access_path.path.empty()) {
+            if (StringCaseEqual()(path.data_access_path.path[0], 
ACCESS_OFFSET)) {
+                _read_mode = ReadMode::OFFSET_ONLY;
+                return;
+            } else if (StringCaseEqual()(path.data_access_path.path[0], 
ACCESS_NULL)) {
+                _read_mode = ReadMode::NULL_MAP_ONLY;
+                return;
+            }
+        }
+    }
+    _read_mode = ReadMode::DEFAULT;
+}
+
 Status FileColumnIterator::init(const ColumnIteratorOptions& opts) {
     if (_reading_flag == ReadingFlag::SKIP_READING) {
         DLOG(INFO) << "File column iterator column " << _column_name << " skip 
reading.";
@@ -1838,6 +2093,54 @@ Status 
FileColumnIterator::next_batch_of_zone_map(size_t* n, MutableColumnPtr& d
 }
 
 Status FileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* 
has_null) {
+    if (read_null_map_only()) {
+        DLOG(INFO) << "File column iterator column " << _column_name
+                   << " in NULL_MAP_ONLY mode, reading only null map.";
+        DORIS_CHECK(dst->is_nullable());
+        auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+        auto& null_map_data = nullable_col.get_null_map_data();
+
+        size_t remaining = *n;
+        *has_null = false;
+        while (remaining > 0) {
+            if (!_page.has_remaining()) {
+                bool eos = false;
+                RETURN_IF_ERROR(_load_next_page(&eos));
+                if (eos) {
+                    break;
+                }
+            }
+
+            size_t nrows_in_page = std::min(remaining, _page.remaining());
+            size_t nrows_to_read = nrows_in_page;
+            if (_page.has_null) {
+                while (nrows_to_read > 0) {
+                    bool is_null = false;
+                    size_t this_run = _page.null_decoder.GetNextRun(&is_null, 
nrows_to_read);
+                    const size_t cur_size = null_map_data.size();
+                    null_map_data.resize(cur_size + this_run);
+                    memset(null_map_data.data() + cur_size, is_null ? 1 : 0, 
this_run);
+                    if (is_null) {
+                        *has_null = true;
+                    }
+                    nrows_to_read -= this_run;
+                    _page.offset_in_page += this_run;
+                    _current_ordinal += this_run;
+                }
+            } else {
+                const size_t cur_size = null_map_data.size();
+                null_map_data.resize(cur_size + nrows_to_read);
+                memset(null_map_data.data() + cur_size, 0, nrows_to_read);
+                _page.offset_in_page += nrows_to_read;
+                _current_ordinal += nrows_to_read;
+            }
+            remaining -= nrows_in_page;
+        }
+        *n -= remaining;
+        nullable_col.get_nested_column().insert_many_defaults(*n);
+        return Status::OK();
+    }
+
     if (_reading_flag == ReadingFlag::SKIP_READING) {
         DLOG(INFO) << "File column iterator column " << _column_name << " skip 
reading.";
         dst->insert_many_defaults(*n);
@@ -1903,6 +2206,71 @@ Status FileColumnIterator::next_batch(size_t* n, 
MutableColumnPtr& dst, bool* ha
 
 Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t 
count,
                                           MutableColumnPtr& dst) {
+    if (read_null_map_only()) {
+        DLOG(INFO) << "File column iterator column " << _column_name
+                   << " in NULL_MAP_ONLY mode, reading only null map by 
rowids.";
+
+        DORIS_CHECK(dst->is_nullable());
+        auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+        auto& null_map_data = nullable_col.get_null_map_data();
+        const size_t base_size = null_map_data.size();
+        null_map_data.resize(base_size + count);
+
+        nullable_col.get_nested_column().insert_many_defaults(count);
+
+        size_t remaining = count;
+        size_t total_read_count = 0;
+        size_t nrows_to_read = 0;
+        while (remaining > 0) {
+            RETURN_IF_ERROR(seek_to_ordinal(rowids[total_read_count]));
+
+            nrows_to_read = std::min(remaining, _page.remaining());
+
+            if (_page.has_null) {
+                size_t already_read = 0;
+                while ((nrows_to_read - already_read) > 0) {
+                    bool is_null = false;
+                    size_t this_run = std::min(nrows_to_read - already_read, 
_page.remaining());
+                    if (UNLIKELY(this_run == 0)) {
+                        break;
+                    }
+                    this_run = _page.null_decoder.GetNextRun(&is_null, 
this_run);
+
+                    size_t offset = total_read_count + already_read;
+                    size_t this_read_count = 0;
+                    rowid_t current_ordinal_in_page =
+                            cast_set<uint32_t>(_page.offset_in_page + 
_page.first_ordinal);
+                    for (size_t i = 0; i < this_run; ++i) {
+                        if (rowids[offset + i] - current_ordinal_in_page >= 
this_run) {
+                            break;
+                        }
+                        this_read_count++;
+                    }
+
+                    if (this_read_count > 0) {
+                        memset(null_map_data.data() + base_size + offset, 
is_null ? 1 : 0,
+                               this_read_count);
+                    }
+
+                    already_read += this_read_count;
+                    _page.offset_in_page += this_run;
+                }
+
+                nrows_to_read = already_read;
+                total_read_count += nrows_to_read;
+                remaining -= nrows_to_read;
+            } else {
+                memset(null_map_data.data() + base_size + total_read_count, 0, 
nrows_to_read);
+                total_read_count += nrows_to_read;
+                remaining -= nrows_to_read;
+            }
+        }
+
+        null_map_data.resize(base_size + total_read_count);
+        
nullable_col.get_nested_column().insert_many_defaults(total_read_count);
+        return Status::OK();
+    }
+
     if (_reading_flag == ReadingFlag::SKIP_READING) {
         DLOG(INFO) << "File column iterator column " << _column_name << " skip 
reading.";
         dst->insert_many_defaults(count);
@@ -2001,11 +2369,14 @@ Status FileColumnIterator::_read_data_page(const 
OrdinalPageIndexIterator& iter)
     Slice page_body;
     PageFooterPB footer;
     _opts.type = DATA_PAGE;
+    PageDecoderOptions decoder_opts;
+    decoder_opts.only_read_offsets = _opts.only_read_offsets;
     RETURN_IF_ERROR(
             _reader->read_page(_opts, iter.page(), &handle, &page_body, 
&footer, _compress_codec));
     // parse data page
     auto st = ParsedPage::create(std::move(handle), page_body, 
footer.data_page_footer(),
-                                 _reader->encoding_info(), iter.page(), 
iter.page_index(), &_page);
+                                 _reader->encoding_info(), iter.page(), 
iter.page_index(), &_page,
+                                 decoder_opts);
     if (!st.ok()) {
         LOG(WARNING) << "failed to create ParsedPage, file=" << 
_opts.file_reader->path().native()
                      << ", page_offset=" << iter.page().offset << ", 
page_size=" << iter.page().size
diff --git a/be/src/storage/segment/column_reader.h 
b/be/src/storage/segment/column_reader.h
index 57a4bc4d74a..7089f6566e1 100644
--- a/be/src/storage/segment/column_reader.h
+++ b/be/src/storage/segment/column_reader.h
@@ -104,6 +104,7 @@ struct ColumnIteratorOptions {
     // reader statistics
     OlapReaderStatistics* stats = nullptr; // Ref
     io::IOContext io_ctx;
+    bool only_read_offsets = false;
 
     void sanity_check() const {
         CHECK_NOTNULL(file_reader);
@@ -408,17 +409,38 @@ public:
             std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>& 
prefetchers,
             PrefetcherInitMethod init_method) {}
 
+    static constexpr const char* ACCESS_OFFSET = "OFFSET";
+    static constexpr const char* ACCESS_ALL = "*";
+    static constexpr const char* ACCESS_MAP_KEYS = "KEYS";
+    static constexpr const char* ACCESS_MAP_VALUES = "VALUES";
+    static constexpr const char* ACCESS_NULL = "NULL";
+
+    // Meta-only read modes:
+    // - OFFSET_ONLY: only read offset information (e.g., for 
array_size/map_size/string_length)
+    // - NULL_MAP_ONLY: only read null map (e.g., for IS NULL / IS NOT NULL 
predicates)
+    // When these modes are enabled, actual content data is skipped.
+    enum class ReadMode : int { DEFAULT, OFFSET_ONLY, NULL_MAP_ONLY };
+
+    bool read_offset_only() const { return _read_mode == 
ReadMode::OFFSET_ONLY; }
+    bool read_null_map_only() const { return _read_mode == 
ReadMode::NULL_MAP_ONLY; }
+
 protected:
+    // Checks sub access paths for OFFSET or NULL meta-only modes and
+    // updates _read_mode accordingly. Use the accessor helpers
+    // read_offset_only() / read_null_map_only() to query the current mode.
+    void _check_and_set_meta_read_mode(const TColumnAccessPaths& 
sub_all_access_paths);
+
     Result<TColumnAccessPaths> _get_sub_access_paths(const TColumnAccessPaths& 
access_paths);
     ColumnIteratorOptions _opts;
 
     ReadingFlag _reading_flag {ReadingFlag::NORMAL_READING};
+    ReadMode _read_mode = ReadMode::DEFAULT;
     std::string _column_name;
 };
 
 // This iterator is used to read column data from file
 // for scalar type
-class FileColumnIterator final : public ColumnIterator {
+class FileColumnIterator : public ColumnIterator {
 public:
     explicit FileColumnIterator(std::shared_ptr<ColumnReader> reader);
     ~FileColumnIterator() override;
@@ -472,7 +494,6 @@ private:
 
     std::shared_ptr<ColumnReader> _reader = nullptr;
 
-    // iterator owned compress codec, should NOT be shared by threads, 
initialized in init()
     BlockCompressionCodec* _compress_codec = nullptr;
 
     // 1. The _page represents current page.
@@ -508,6 +529,21 @@ public:
     ordinal_t get_current_ordinal() const override { return 0; }
 };
 
+// StringFileColumnIterator extends FileColumnIterator with meta-only reading
+// support for string/binary column types. When the OFFSET path is detected in
+// set_access_paths, it sets only_read_offsets on the ColumnIteratorOptions so
+// that the BinaryPlainPageDecoder skips chars memcpy and only fills offsets.
+class StringFileColumnIterator final : public FileColumnIterator {
+public:
+    explicit StringFileColumnIterator(std::shared_ptr<ColumnReader> reader);
+    ~StringFileColumnIterator() override = default;
+
+    Status init(const ColumnIteratorOptions& opts) override;
+
+    Status set_access_paths(const TColumnAccessPaths& all_access_paths,
+                            const TColumnAccessPaths& predicate_access_paths) 
override;
+};
+
 // This iterator make offset operation write once for
 class OffsetFileColumnIterator final : public ColumnIterator {
 public:
@@ -520,6 +556,12 @@ public:
     Status init(const ColumnIteratorOptions& opts) override;
 
     Status next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) 
override;
+
+    Status next_batch(size_t* n, MutableColumnPtr& dst) {
+        bool has_null;
+        return next_batch(n, dst, &has_null);
+    }
+
     ordinal_t get_current_ordinal() const override {
         return _offset_iterator->get_current_ordinal();
     }
@@ -569,6 +611,9 @@ public:
     Status seek_to_ordinal(ordinal_t ord) override;
 
     ordinal_t get_current_ordinal() const override {
+        if (read_null_map_only() && _null_iterator) {
+            return _null_iterator->get_current_ordinal();
+        }
         return _offsets_iterator->get_current_ordinal();
     }
     Status init_prefetcher(const SegmentPrefetchParams& params) override;
@@ -603,12 +648,20 @@ public:
 
     Status next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) 
override;
 
+    Status next_batch(size_t* n, MutableColumnPtr& dst) {
+        bool has_null;
+        return next_batch(n, dst, &has_null);
+    }
+
     Status read_by_rowids(const rowid_t* rowids, const size_t count,
                           MutableColumnPtr& dst) override;
 
     Status seek_to_ordinal(ordinal_t ord) override;
 
     ordinal_t get_current_ordinal() const override {
+        if (read_null_map_only() && _null_iterator) {
+            return _null_iterator->get_current_ordinal();
+        }
         return _sub_column_iterators[0]->get_current_ordinal();
     }
 
@@ -643,12 +696,20 @@ public:
 
     Status next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) 
override;
 
+    Status next_batch(size_t* n, MutableColumnPtr& dst) {
+        bool has_null;
+        return next_batch(n, dst, &has_null);
+    }
+
     Status read_by_rowids(const rowid_t* rowids, const size_t count,
                           MutableColumnPtr& dst) override;
 
     Status seek_to_ordinal(ordinal_t ord) override;
 
     ordinal_t get_current_ordinal() const override {
+        if (read_null_map_only() && _null_iterator) {
+            return _null_iterator->get_current_ordinal();
+        }
         return _offset_iterator->get_current_ordinal();
     }
 
diff --git a/be/src/storage/segment/options.h b/be/src/storage/segment/options.h
index 72509cb4602..4ff94c0a8e3 100644
--- a/be/src/storage/segment/options.h
+++ b/be/src/storage/segment/options.h
@@ -49,6 +49,7 @@ struct PageBuilderOptions {
 
 struct PageDecoderOptions {
     bool need_check_bitmap = true;
+    bool only_read_offsets = false;
 };
 
 } // namespace segment_v2
diff --git a/be/test/storage/segment/column_reader_test.cpp 
b/be/test/storage/segment/column_reader_test.cpp
index 92385130c9d..54bf94a2061 100644
--- a/be/test/storage/segment/column_reader_test.cpp
+++ b/be/test/storage/segment/column_reader_test.cpp
@@ -301,4 +301,42 @@ TEST_F(ColumnReaderTest, 
MapReadByRowidsSkipReadingResizesDestination) {
     ASSERT_TRUE(st.ok()) << "read_by_rowids failed: " << st.to_string();
     ASSERT_EQ(count, dst->size());
 }
-} // namespace doris::segment_v2
\ No newline at end of file
+TEST_F(ColumnReaderTest, MapAccessAllWithOffsetDoesNotPropagateOffsetToKey) {
+    // Regression test: when the access path is [map_col, *, OFFSET]
+    // (e.g. length(map_col['some_key'])), the key column must be fully read
+    // so that element_at() can match the key. Only the value column should
+    // enter OFFSET_ONLY mode.
+    auto map_reader = std::make_shared<ColumnReader>();
+    auto null_iter = 
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>());
+    auto offsets_iter = std::make_unique<OffsetFileColumnIterator>(
+            
std::make_unique<FileColumnIterator>(std::make_shared<ColumnReader>()));
+    auto key_iter = 
std::make_unique<StringFileColumnIterator>(std::make_shared<ColumnReader>());
+    key_iter->set_column_name("key");
+    auto val_iter = 
std::make_unique<StringFileColumnIterator>(std::make_shared<ColumnReader>());
+    val_iter->set_column_name("value");
+
+    MapFileColumnIterator map_iter(map_reader, std::move(null_iter), 
std::move(offsets_iter),
+                                   std::move(key_iter), std::move(val_iter));
+    map_iter.set_column_name("map_col");
+
+    // path: [map_col, *, OFFSET]  — simulates length(map_col['c_phone'])
+    TColumnAccessPaths all_access_paths;
+    all_access_paths.emplace_back();
+    all_access_paths[0].data_access_path.path = {"map_col", "*", "OFFSET"};
+    TColumnAccessPaths predicate_access_paths;
+
+    auto st = map_iter.set_access_paths(all_access_paths, 
predicate_access_paths);
+    ASSERT_TRUE(st.ok()) << "set_access_paths failed: " << st.to_string();
+
+    // Key must be fully readable (NEED_TO_READ), NOT in OFFSET_ONLY mode.
+    auto* key_ptr = 
static_cast<StringFileColumnIterator*>(map_iter._key_iterator.get());
+    ASSERT_EQ(key_ptr->_reading_flag, 
ColumnIterator::ReadingFlag::NEED_TO_READ);
+    ASSERT_FALSE(key_ptr->read_offset_only());
+
+    // Value should be in OFFSET_ONLY mode since we only need string lengths.
+    auto* val_ptr = 
static_cast<StringFileColumnIterator*>(map_iter._val_iterator.get());
+    ASSERT_EQ(val_ptr->_reading_flag, 
ColumnIterator::ReadingFlag::NEED_TO_READ);
+    ASSERT_TRUE(val_ptr->read_offset_only());
+}
+
+} // namespace doris::segment_v2


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to