This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 49f280996da [Refactor](variant) Promote enable_doc_mode from 
column-level to type-level  (#61895) (#61950)
49f280996da is described below

commit 49f280996da1f58989f4cb49f62d4c39a55ede16
Author: Chenyang Sun <[email protected]>
AuthorDate: Wed Apr 1 13:40:18 2026 +0800

    [Refactor](variant) Promote enable_doc_mode from column-level to type-level 
 (#61895) (#61950)
    
    pick  from master #61895
    
    ---------
    
    Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
 be/src/core/column/column_variant.cpp              | 99 ++++++++++++++++------
 be/src/core/column/column_variant.h                | 16 ++--
 be/src/core/data_type/data_type_factory.cpp        | 26 +++---
 be/src/core/data_type/data_type_variant.cpp        | 19 ++---
 be/src/core/data_type/data_type_variant.h          |  6 +-
 be/src/exec/common/variant_util.cpp                | 13 ++-
 be/src/exec/scan/olap_scanner.cpp                  |  5 +-
 .../exprs/function/array/function_array_utils.cpp  |  2 +-
 be/src/exprs/function/array/function_array_utils.h |  2 +
 be/src/exprs/function/cast/cast_to_variant.h       |  7 +-
 be/src/exprs/function/function_variant_element.cpp | 53 ++++++------
 be/src/exprs/table_function/vexplode.cpp           |  1 +
 be/src/exprs/table_function/vexplode_v2.cpp        |  1 +
 be/src/storage/segment/segment.cpp                 |  7 +-
 be/src/storage/segment/segment_writer.cpp          |  1 +
 .../segment/variant/hierarchical_data_iterator.cpp | 66 +++++++++++++--
 .../segment/variant/hierarchical_data_iterator.h   |  8 +-
 .../segment/variant/variant_column_reader.cpp      | 18 ++--
 .../segment/variant/variant_column_writer_impl.cpp |  7 +-
 .../variant_doc_snpashot_compact_iterator.h        |  3 +-
 .../variant_streaming_compaction_writer.cpp        |  2 +-
 be/src/storage/segment/vertical_segment_writer.cpp |  1 +
 be/src/storage/tablet/tablet_meta.cpp              |  4 +-
 be/src/storage/tablet/tablet_schema.cpp            |  4 +-
 be/src/storage/tablet/tablet_schema.h              |  3 +-
 be/test/core/column/column_variant_test.cpp        | 50 +++++------
 be/test/exec/common/schema_util_rowset_test.cpp    |  2 +-
 be/test/exec/common/schema_util_test.cpp           | 22 ++---
 .../function/cast/function_variant_cast_test.cpp   | 16 ++--
 .../function/function_variant_element_test.cpp     |  2 +-
 .../segment/hierarchical_data_iterator_test.cpp    |  4 +-
 .../storage/segment/nested_group_provider_test.cpp |  2 +-
 .../segment/variant_column_writer_reader_test.cpp  | 43 +++++-----
 be/test/storage/segment/variant_util_test.cpp      | 16 ++--
 be/test/testutil/variant_util.h                    | 10 +--
 .../java/org/apache/doris/catalog/VariantType.java |  2 +
 .../main/java/org/apache/doris/catalog/Column.java |  1 +
 .../expressions/functions/scalar/ElementAt.java    |  7 +-
 .../ExternalFileTableValuedFunction.java           |  4 +-
 gensrc/proto/data.proto                            |  1 +
 gensrc/proto/segment_v2.proto                      |  1 +
 gensrc/proto/types.proto                           |  1 +
 gensrc/thrift/Descriptors.thrift                   |  2 +-
 gensrc/thrift/Types.thrift                         |  2 +
 44 files changed, 359 insertions(+), 203 deletions(-)

diff --git a/be/src/core/column/column_variant.cpp 
b/be/src/core/column/column_variant.cpp
index 9dafa0c5230..5ef2016d243 100644
--- a/be/src/core/column/column_variant.cpp
+++ b/be/src/core/column/column_variant.cpp
@@ -480,7 +480,8 @@ MutableColumnPtr ColumnVariant::apply_for_columns(Func&& 
func) const {
         return finalized_object.apply_for_columns(std::forward<Func>(func));
     }
     auto new_root = func(get_root())->assume_mutable();
-    auto res = ColumnVariant::create(_max_subcolumns_count, get_root_type(), 
std::move(new_root));
+    auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode, 
get_root_type(),
+                                     std::move(new_root));
     for (const auto& subcolumn : subcolumns) {
         if (subcolumn->data.is_root) {
             continue;
@@ -678,17 +679,21 @@ 
ColumnVariant::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_, bo
     base_type_id = base_type->get_primitive_type();
 }
 
-ColumnVariant::ColumnVariant(int32_t max_subcolumns_count)
-        : is_nullable(true), num_rows(0), 
_max_subcolumns_count(max_subcolumns_count) {
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool 
enable_doc_mode)
+        : is_nullable(true),
+          num_rows(0),
+          _max_subcolumns_count(max_subcolumns_count),
+          _enable_doc_mode(enable_doc_mode) {
     subcolumns.create_root(Subcolumn(0, is_nullable, true /*root*/));
     ENABLE_CHECK_CONSISTENCY(this);
 }
 
-ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, DataTypePtr 
root_type,
-                             MutableColumnPtr&& root_column)
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool 
enable_doc_mode,
+                             DataTypePtr root_type, MutableColumnPtr&& 
root_column)
         : is_nullable(true),
           num_rows(root_column->size()),
-          _max_subcolumns_count(max_subcolumns_count) {
+          _max_subcolumns_count(max_subcolumns_count),
+          _enable_doc_mode(enable_doc_mode) {
     subcolumns.create_root(
             Subcolumn(std::move(root_column), root_type, is_nullable, true 
/*root*/));
     serialized_sparse_column->resize(num_rows);
@@ -696,11 +701,13 @@ ColumnVariant::ColumnVariant(int32_t 
max_subcolumns_count, DataTypePtr root_type
     ENABLE_CHECK_CONSISTENCY(this);
 }
 
-ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, Subcolumns&& 
subcolumns_)
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool 
enable_doc_mode,
+                             Subcolumns&& subcolumns_)
         : is_nullable(true),
           subcolumns(std::move(subcolumns_)),
           num_rows(subcolumns.empty() ? 0 : 
(*subcolumns.begin())->data.size()),
-          _max_subcolumns_count(max_subcolumns_count) {
+          _max_subcolumns_count(max_subcolumns_count),
+          _enable_doc_mode(enable_doc_mode) {
     if (max_subcolumns_count && subcolumns_.size() > max_subcolumns_count + 1) 
{
         throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
                                "unmatched max subcolumns count:, max 
subcolumns count: {}, but "
@@ -711,8 +718,11 @@ ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, 
Subcolumns&& subcolum
     serialized_doc_value_column->resize(num_rows);
 }
 
-ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, size_t size)
-        : is_nullable(true), num_rows(0), 
_max_subcolumns_count(max_subcolumns_count) {
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool 
enable_doc_mode, size_t size)
+        : is_nullable(true),
+          num_rows(0),
+          _max_subcolumns_count(max_subcolumns_count),
+          _enable_doc_mode(enable_doc_mode) {
     subcolumns.create_root(Subcolumn(0, is_nullable, true /*root*/));
     insert_many_defaults(size);
     ENABLE_CHECK_CONSISTENCY(this);
@@ -737,12 +747,30 @@ void ColumnVariant::check_consistency() const {
                                "unmatched doc snapshot column:, expeted rows: 
{}, but meet: {}",
                                num_rows, serialized_doc_value_column->size());
     }
-    // const auto& offsets = serialized_doc_value_column_offsets();
-    // size_t off = offsets[num_rows - 1];
-    // if (off > 0 && subcolumns.size() != 1) {
-    //     throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
-    //                            "doc snapshot column offsets is not empty, 
but subcolumns size is not 1");
-    // }
+    if (_enable_doc_mode && num_rows > 0) {
+        // doc mode invariants:
+        // - only root subcolumn (size == 1)
+        // - sparse column is empty
+        // - subcolumns and doc_value are mutually exclusive
+        if (subcolumns.size() != 1) {
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "doc mode: should only have root, but 
subcolumns size is {}",
+                                   subcolumns.size());
+        }
+        const auto& sparse_offs = serialized_sparse_column_offsets();
+        if (sparse_offs[num_rows - 1] > 0) {
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "doc mode: should not have sparse data");
+        }
+    } else {
+        const auto& offsets = serialized_doc_value_column_offsets();
+        size_t off = offsets[num_rows - 1];
+        if (off > 0 && subcolumns.size() != 1) {
+            throw doris::Exception(
+                    doris::ErrorCode::INTERNAL_ERROR,
+                    "doc snapshot column offsets is not empty, but subcolumns 
size is not 1");
+        }
+    }
 }
 
 size_t ColumnVariant::size() const {
@@ -752,7 +780,7 @@ size_t ColumnVariant::size() const {
 
 MutableColumnPtr ColumnVariant::clone_resized(size_t new_size) const {
     if (new_size == 0) {
-        return ColumnVariant::create(_max_subcolumns_count);
+        return ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
     }
     return apply_for_columns(
             [&](const ColumnPtr column) { return 
column->clone_resized(new_size); });
@@ -793,8 +821,11 @@ void ColumnVariant::for_each_subcolumn(ColumnCallback 
callback) {
 
 void ColumnVariant::insert_from(const IColumn& src, size_t n) {
     const auto* src_v = check_and_get_column<ColumnVariant>(src);
-    // only root, quick insert
-    if (src_v->get_subcolumns().size() == 1 && get_subcolumns().size() == 1) {
+    ENABLE_CHECK_CONSISTENCY(src_v);
+    ENABLE_CHECK_CONSISTENCY(this);
+    // doc mode fast path: both sides root-only, direct copy root + sparse + 
doc_value
+    if (_enable_doc_mode) {
+        DCHECK(src_v->_enable_doc_mode) << "dst is doc mode but src is not";
         FieldWithDataType field;
         src_v->subcolumns.get_root()->data.get(n, field);
         subcolumns.get_mutable_root()->data.insert(field);
@@ -802,8 +833,9 @@ void ColumnVariant::insert_from(const IColumn& src, size_t 
n) {
         
serialized_doc_value_column->insert_from(*src_v->get_doc_value_column(), n);
         num_rows++;
     } else {
-        return try_insert((*src_v)[n]);
+        try_insert((*src_v)[n]);
     }
+    ENABLE_CHECK_CONSISTENCY(this);
 }
 
 void ColumnVariant::try_insert(const Field& field) {
@@ -1126,6 +1158,20 @@ void ColumnVariant::insert_range_from(const IColumn& 
src, size_t start, size_t l
     ENABLE_CHECK_CONSISTENCY(&src_object);
     ENABLE_CHECK_CONSISTENCY(this);
 
+    // doc mode fast path: both sides root-only, direct range copy root + 
sparse + doc_value
+    if (_enable_doc_mode) {
+        DCHECK(src_object._enable_doc_mode) << "dst is doc mode but src is 
not";
+        subcolumns.get_mutable_root()->data.insert_range_from(
+                src_object.subcolumns.get_root()->data, start, length);
+        
serialized_sparse_column->insert_range_from(*src_object.serialized_sparse_column,
 start,
+                                                    length);
+        
serialized_doc_value_column->insert_range_from(*src_object.serialized_doc_value_column,
+                                                       start, length);
+        num_rows += length;
+        ENABLE_CHECK_CONSISTENCY(this);
+        return;
+    }
+
     // First, insert src subcolumns
     // We can reach the limit of subcolumns, and in this case
     // the rest of subcolumns from src will be inserted into sparse column.
@@ -1288,7 +1334,7 @@ MutableColumnPtr ColumnVariant::permute(const 
Permutation& perm, size_t limit) c
     }
 
     if (limit == 0) {
-        return ColumnVariant::create(_max_subcolumns_count);
+        return ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
     }
 
     return apply_for_columns([&](const ColumnPtr column) { return 
column->permute(perm, limit); });
@@ -2144,13 +2190,14 @@ ColumnPtr ColumnVariant::filter(const Filter& filter, 
ssize_t count) const {
         return finalized_object.filter(filter, count);
     }
     if (num_rows == 0) {
-        auto res = ColumnVariant::create(_max_subcolumns_count, 
count_bytes_in_filter(filter));
+        auto res = ColumnVariant::create(_max_subcolumns_count, 
_enable_doc_mode,
+                                         count_bytes_in_filter(filter));
         ENABLE_CHECK_CONSISTENCY(res.get());
         return res;
     }
     auto new_root = get_root()->filter(filter, count)->assume_mutable();
-    auto new_column =
-            ColumnVariant::create(_max_subcolumns_count, get_root_type(), 
std::move(new_root));
+    auto new_column = ColumnVariant::create(_max_subcolumns_count, 
_enable_doc_mode,
+                                            get_root_type(), 
std::move(new_root));
     for (const auto& entry : subcolumns) {
         if (entry->data.is_root) {
             continue;
@@ -2254,7 +2301,7 @@ bool NO_SANITIZE_UNDEFINED 
ColumnVariant::is_scalar_variant() const {
 
 const DataTypePtr ColumnVariant::NESTED_TYPE =
         std::make_shared<DataTypeNullable>(std::make_shared<DataTypeArray>(
-                
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeVariant>(0))));
+                
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeVariant>(0, 
false))));
 
 const DataTypePtr ColumnVariant::NESTED_TYPE_AS_ARRAY_OF_JSONB = 
std::make_shared<DataTypeArray>(
         std::make_shared<DataTypeNullable>(std::make_shared<DataTypeJsonb>()));
@@ -2634,7 +2681,7 @@ void 
ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn, Null
 }
 
 MutableColumnPtr ColumnVariant::clone() const {
-    auto res = ColumnVariant::create(_max_subcolumns_count);
+    auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
     Subcolumns new_subcolumns;
     for (const auto& subcolumn : subcolumns) {
         auto new_subcolumn = subcolumn->data;
diff --git a/be/src/core/column/column_variant.h 
b/be/src/core/column/column_variant.h
index 2c3c427454e..9b7d84df525 100644
--- a/be/src/core/column/column_variant.h
+++ b/be/src/core/column/column_variant.h
@@ -286,6 +286,9 @@ private:
     // if `_max_subcolumns_count == 0`, all subcolumns are materialized.
     int32_t _max_subcolumns_count = 0;
 
+    // whether this column is configured for doc mode
+    bool _enable_doc_mode = false;
+
     // subcolumns count materialized from typed paths
     size_t typed_path_count = 0;
 
@@ -298,15 +301,16 @@ public:
 private:
     friend class COWHelper<IColumn, ColumnVariant>;
     // always create root: data type nothing
-    explicit ColumnVariant(int32_t max_subcolumns_count);
+    explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode);
 
     // always create root: data type nothing
-    explicit ColumnVariant(int32_t max_subcolumns_count, size_t size);
+    explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode, 
size_t size);
 
-    explicit ColumnVariant(int32_t max_subcolumns_count, DataTypePtr root_type,
-                           MutableColumnPtr&& root_column);
+    explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode,
+                           DataTypePtr root_type, MutableColumnPtr&& 
root_column);
 
-    explicit ColumnVariant(int32_t max_subcolumns_count, Subcolumns&& 
subcolumns_);
+    explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode,
+                           Subcolumns&& subcolumns_);
 
 public:
     ~ColumnVariant() override = default;
@@ -381,6 +385,8 @@ public:
 
     int32_t max_subcolumns_count() const { return _max_subcolumns_count; }
 
+    bool enable_doc_mode() const { return _enable_doc_mode; }
+
     /// Adds a subcolumn from existing IColumn.
     bool add_sub_column(const PathInData& key, MutableColumnPtr&& subcolumn, 
DataTypePtr type);
 
diff --git a/be/src/core/data_type/data_type_factory.cpp 
b/be/src/core/data_type/data_type_factory.cpp
index 6cbec46ae30..dd4b4a98ee9 100644
--- a/be/src/core/data_type/data_type_factory.cpp
+++ b/be/src/core/data_type/data_type_factory.cpp
@@ -104,7 +104,8 @@ DataTypePtr DataTypeFactory::create_data_type(const 
TabletColumn& col_desc, bool
         }
         nested = std::make_shared<DataTypeStruct>(dataTypes, names);
     } else if (col_desc.type() == FieldType::OLAP_FIELD_TYPE_VARIANT) {
-        nested = 
std::make_shared<DataTypeVariant>(col_desc.variant_max_subcolumns_count());
+        nested = 
std::make_shared<DataTypeVariant>(col_desc.variant_max_subcolumns_count(),
+                                                   
col_desc.variant_enable_doc_mode());
     } else {
         nested = _create_primitive_data_type(col_desc.type(), 
col_desc.precision(), col_desc.frac(),
                                              col_desc.length());
@@ -175,7 +176,7 @@ DataTypePtr 
DataTypeFactory::_create_primitive_data_type(const FieldType& type,
         result = std::make_shared<DataTypeString>(-1, TYPE_STRING);
         break;
     case FieldType::OLAP_FIELD_TYPE_VARIANT:
-        result = std::make_shared<DataTypeVariant>(0);
+        result = std::make_shared<DataTypeVariant>(0, false);
         break;
     case FieldType::OLAP_FIELD_TYPE_JSONB:
         result = std::make_shared<DataTypeJsonb>();
@@ -242,7 +243,8 @@ DataTypePtr DataTypeFactory::create_data_type(const 
PColumnMeta& pcolumn) {
         nested = std::make_shared<DataTypeString>();
         break;
     case PGenericType::VARIANT:
-        nested = 
std::make_shared<DataTypeVariant>(pcolumn.variant_max_subcolumns_count());
+        nested = 
std::make_shared<DataTypeVariant>(pcolumn.variant_max_subcolumns_count(),
+                                                   
pcolumn.variant_enable_doc_mode());
         break;
     case PGenericType::JSONB:
         nested = std::make_shared<DataTypeJsonb>();
@@ -447,7 +449,7 @@ DataTypePtr DataTypeFactory::create_data_type(const 
PrimitiveType primitive_type
         nested = std::make_shared<DataTypeFloat64>();
         break;
     case TYPE_VARIANT:
-        nested = std::make_shared<DataTypeVariant>(0);
+        nested = std::make_shared<DataTypeVariant>(0, false);
         break;
     case TYPE_STRING:
     case TYPE_CHAR:
@@ -521,10 +523,12 @@ DataTypePtr DataTypeFactory::create_data_type(const 
std::vector<TTypeNode>& type
         if (scalar_type.type == TPrimitiveType::VARIANT) {
             DCHECK(scalar_type.variant_max_subcolumns_count >= 0)
                     << "count is: " << 
scalar_type.variant_max_subcolumns_count;
-            return is_nullable ? 
make_nullable(std::make_shared<DataTypeVariant>(
-                                         
scalar_type.variant_max_subcolumns_count))
-                               : std::make_shared<DataTypeVariant>(
-                                         
scalar_type.variant_max_subcolumns_count);
+            bool doc_mode = scalar_type.__isset.variant_enable_doc_mode
+                                    ? scalar_type.variant_enable_doc_mode
+                                    : false;
+            auto dt = 
std::make_shared<DataTypeVariant>(scalar_type.variant_max_subcolumns_count,
+                                                        doc_mode);
+            return is_nullable ? make_nullable(dt) : dt;
         }
         return create_data_type(thrift_to_type(scalar_type.type), is_nullable,
                                 scalar_type.__isset.precision ? 
scalar_type.precision : 0,
@@ -627,7 +631,8 @@ DataTypePtr DataTypeFactory::create_data_type(
             // Do nothing
             nested = std::make_shared<DataTypeAggState>();
         } else if (primitive_type == TYPE_VARIANT) {
-            nested = 
std::make_shared<DataTypeVariant>(node.variant_max_subcolumns_count());
+            nested = 
std::make_shared<DataTypeVariant>(node.variant_max_subcolumns_count(),
+                                                       
node.variant_enable_doc_mode());
         } else {
             return create_data_type(primitive_type, is_nullable,
                                     scalar_type.has_precision() ? 
scalar_type.precision() : 0,
@@ -668,7 +673,8 @@ DataTypePtr DataTypeFactory::create_data_type(
         break;
     }
     case TTypeNodeType::VARIANT: {
-        nested = 
std::make_shared<DataTypeVariant>(node.variant_max_subcolumns_count());
+        nested = 
std::make_shared<DataTypeVariant>(node.variant_max_subcolumns_count(),
+                                                   
node.variant_enable_doc_mode());
         break;
     }
     default:
diff --git a/be/src/core/data_type/data_type_variant.cpp 
b/be/src/core/data_type/data_type_variant.cpp
index f3b896fea64..8a434d3b7f5 100644
--- a/be/src/core/data_type/data_type_variant.cpp
+++ b/be/src/core/data_type/data_type_variant.cpp
@@ -46,19 +46,15 @@ class IColumn;
 namespace doris {
 #include "common/compile_check_begin.h"
 
-DataTypeVariant::DataTypeVariant(int32_t max_subcolumns_count)
-        : _max_subcolumns_count(max_subcolumns_count) {
-    name = fmt::format("Variant(max subcolumns count = {})", 
max_subcolumns_count);
+DataTypeVariant::DataTypeVariant(int32_t max_subcolumns_count, bool 
enable_doc_mode)
+        : _max_subcolumns_count(max_subcolumns_count), 
_enable_doc_mode(enable_doc_mode) {
+    name = fmt::format("Variant(max subcolumns count = {}, enable doc mode = 
{})",
+                       max_subcolumns_count, enable_doc_mode);
 }
 bool DataTypeVariant::equals(const IDataType& rhs) const {
     auto rhs_type = typeid_cast<const DataTypeVariant*>(&rhs);
-    if (rhs_type && _max_subcolumns_count != 
rhs_type->variant_max_subcolumns_count()) {
-        VLOG_DEBUG << "_max_subcolumns_count is" << _max_subcolumns_count
-                   << "rhs_type->variant_max_subcolumns_count()"
-                   << rhs_type->variant_max_subcolumns_count();
-        return false;
-    }
-    return rhs_type && _max_subcolumns_count == 
rhs_type->variant_max_subcolumns_count();
+    return rhs_type && _max_subcolumns_count == 
rhs_type->variant_max_subcolumns_count() &&
+           _enable_doc_mode == rhs_type->enable_doc_mode();
 }
 
 int64_t DataTypeVariant::get_uncompressed_serialized_bytes(const IColumn& 
column,
@@ -236,10 +232,11 @@ const char* DataTypeVariant::deserialize(const char* buf, 
MutableColumnPtr* colu
 void DataTypeVariant::to_pb_column_meta(PColumnMeta* col_meta) const {
     IDataType::to_pb_column_meta(col_meta);
     col_meta->set_variant_max_subcolumns_count(_max_subcolumns_count);
+    col_meta->set_variant_enable_doc_mode(_enable_doc_mode);
 }
 
 MutableColumnPtr DataTypeVariant::create_column() const {
-    return ColumnVariant::create(_max_subcolumns_count);
+    return ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
 }
 
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/core/data_type/data_type_variant.h 
b/be/src/core/data_type/data_type_variant.h
index fd59733f452..d4353b9c858 100644
--- a/be/src/core/data_type/data_type_variant.h
+++ b/be/src/core/data_type/data_type_variant.h
@@ -48,13 +48,14 @@ namespace doris {
 class DataTypeVariant : public IDataType {
 private:
     int32_t _max_subcolumns_count = 0;
+    bool _enable_doc_mode = false;
     std::string name = "Variant";
 
 public:
     static constexpr PrimitiveType PType = TYPE_VARIANT;
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_VARIANT; }
     DataTypeVariant() = default;
-    DataTypeVariant(int32_t max_subcolumns_count);
+    DataTypeVariant(int32_t max_subcolumns_count, bool enable_doc_mode);
     String do_get_name() const override { return name; }
     const std::string get_family_name() const override { return "Variant"; }
 
@@ -81,8 +82,11 @@ public:
     };
     void to_protobuf(PTypeDesc* ptype, PTypeNode* node, PScalarType* 
scalar_type) const override {
         node->set_type(TTypeNodeType::VARIANT);
+        node->set_variant_max_subcolumns_count(_max_subcolumns_count);
+        node->set_variant_enable_doc_mode(_enable_doc_mode);
     }
     void to_pb_column_meta(PColumnMeta* col_meta) const override;
     int32_t variant_max_subcolumns_count() const { return 
_max_subcolumns_count; }
+    bool enable_doc_mode() const { return _enable_doc_mode; }
 };
 } // namespace doris
diff --git a/be/src/exec/common/variant_util.cpp 
b/be/src/exec/common/variant_util.cpp
index 7b271cf3ec7..d604b7b47f1 100644
--- a/be/src/exec/common/variant_util.cpp
+++ b/be/src/exec/common/variant_util.cpp
@@ -151,7 +151,8 @@ Status cast_column(const ColumnWithTypeAndName& arg, const 
DataTypePtr& type, Co
         CHECK(arg.column->is_nullable());
         auto to_type = remove_nullable(type);
         const auto& data_type_object = assert_cast<const 
DataTypeVariant&>(*to_type);
-        auto variant = 
ColumnVariant::create(data_type_object.variant_max_subcolumns_count());
+        auto variant = 
ColumnVariant::create(data_type_object.variant_max_subcolumns_count(),
+                                             
data_type_object.enable_doc_mode());
 
         variant->create_root(arg.type, arg.column->assume_mutable());
         ColumnPtr nullable = ColumnNullable::create(
@@ -225,8 +226,9 @@ void get_column_by_type(const DataTypePtr& data_type, const 
std::string& name, T
         return;
     }
     if (data_type->get_primitive_type() == PrimitiveType::TYPE_VARIANT) {
-        column.set_variant_max_subcolumns_count(assert_cast<const 
DataTypeVariant*>(data_type.get())
-                                                        
->variant_max_subcolumns_count());
+        const auto* dt_variant = assert_cast<const 
DataTypeVariant*>(data_type.get());
+        
column.set_variant_max_subcolumns_count(dt_variant->variant_max_subcolumns_count());
+        column.set_variant_enable_doc_mode(dt_variant->enable_doc_mode());
         return;
     }
     // size is not fixed when type is string or json
@@ -989,6 +991,7 @@ void 
VariantCompactionUtil::get_compaction_subcolumns_from_subpaths(
             subcolumn.set_aggregation_method(parent_column->aggregation());
             subcolumn.set_variant_max_subcolumns_count(
                     parent_column->variant_max_subcolumns_count());
+            
subcolumn.set_variant_enable_doc_mode(parent_column->variant_enable_doc_mode());
             subcolumn.set_is_nullable(true);
             output_schema->append_column(subcolumn);
             VLOG_DEBUG << "append sub column " << subpath << " data type "
@@ -1097,6 +1100,7 @@ Status 
VariantCompactionUtil::get_extended_compaction_schema(
                 TabletColumn doc_value_bucket_column = 
create_doc_value_column(*column, b);
                 
doc_value_bucket_column.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
                 doc_value_bucket_column.set_is_nullable(false);
+                doc_value_bucket_column.set_variant_enable_doc_mode(true);
                 output_schema->append_column(doc_value_bucket_column);
             }
             continue;
@@ -1782,6 +1786,7 @@ void parse_json_to_variant_impl(IColumn& column, const 
char* src, size_t length,
         }
         break;
     case ParseConfig::ParseTo::OnlyDocValueColumn: {
+        CHECK(column_variant.enable_doc_mode()) << "OnlyDocValueColumn 
requires doc mode enabled";
         std::vector<size_t> doc_item_indexes;
         doc_item_indexes.reserve(paths.size());
         phmap::flat_hash_set<StringRef, StringRefHash> seen_paths;
@@ -1977,7 +1982,7 @@ Status _parse_and_materialize_variant_columns(Block& 
block,
         }
 
         if (scalar_root_column->is_column_string()) {
-            variant_column = ColumnVariant::create(0);
+            variant_column = ColumnVariant::create(0, var.enable_doc_mode());
             parse_json_to_variant(*variant_column.get(),
                                   assert_cast<const 
ColumnString&>(*scalar_root_column),
                                   configs[i]);
diff --git a/be/src/exec/scan/olap_scanner.cpp 
b/be/src/exec/scan/olap_scanner.cpp
index 4ac4b2f3e0d..595df071716 100644
--- a/be/src/exec/scan/olap_scanner.cpp
+++ b/be/src/exec/scan/olap_scanner.cpp
@@ -522,11 +522,12 @@ Status OlapScanner::_init_variant_columns() {
         if (slot->type()->get_primitive_type() == PrimitiveType::TYPE_VARIANT) 
{
             // Such columns are not exist in frontend schema info, so we need 
to
             // add them into tablet_schema for later column indexing.
+            const auto& dt_variant =
+                    assert_cast<const 
DataTypeVariant&>(*remove_nullable(slot->type()));
             TabletColumn subcol = 
TabletColumn::create_materialized_variant_column(
                     
tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(),
                     slot->column_paths(), slot->col_unique_id(),
-                    assert_cast<const 
DataTypeVariant&>(*remove_nullable(slot->type()))
-                            .variant_max_subcolumns_count());
+                    dt_variant.variant_max_subcolumns_count(), 
dt_variant.enable_doc_mode());
             if (tablet_schema->field_index(*subcol.path_info_ptr()) < 0) {
                 tablet_schema->append_column(subcol, 
TabletSchema::ColumnType::VARIANT);
             }
diff --git a/be/src/exprs/function/array/function_array_utils.cpp 
b/be/src/exprs/function/array/function_array_utils.cpp
index 4ce3cc4a737..29776153d17 100644
--- a/be/src/exprs/function/array/function_array_utils.cpp
+++ b/be/src/exprs/function/array/function_array_utils.cpp
@@ -58,7 +58,7 @@ bool extract_column_array_info(const IColumn& src, 
ColumnArrayExecutionData& dat
     if (data.output_as_variant &&
         data.nested_type->get_primitive_type() != PrimitiveType::TYPE_VARIANT) 
{
         // set variant root column/type to from column/type
-        auto variant = ColumnVariant::create(true /*always nullable*/);
+        auto variant = ColumnVariant::create(0, data.variant_enable_doc_mode);
         auto nullable_nested_type = make_nullable(data.nested_type);
         auto nullable_col = make_nullable(data.nested_col);
         variant->create_root(nullable_nested_type, 
std::move(*nullable_col).mutate());
diff --git a/be/src/exprs/function/array/function_array_utils.h 
b/be/src/exprs/function/array/function_array_utils.h
index 8b57b4b0e9f..f13791c8cdf 100644
--- a/be/src/exprs/function/array/function_array_utils.h
+++ b/be/src/exprs/function/array/function_array_utils.h
@@ -57,6 +57,8 @@ public:
     DataTypePtr nested_type = nullptr;
     // wrap the nested column as variant column
     bool output_as_variant = false;
+    // propagate enable_doc_mode when wrapping as variant
+    bool variant_enable_doc_mode = false;
 
     ColumnArrayMutableData to_mutable_data() const {
         ColumnArrayMutableData dst;
diff --git a/be/src/exprs/function/cast/cast_to_variant.h 
b/be/src/exprs/function/cast/cast_to_variant.h
index c145fbee987..db10d8cd372 100644
--- a/be/src/exprs/function/cast/cast_to_variant.h
+++ b/be/src/exprs/function/cast/cast_to_variant.h
@@ -128,7 +128,12 @@ struct CastToVariant {
         const auto& from_type = col_with_type_and_name.type;
         const auto& col_from = col_with_type_and_name.column;
         // set variant root column/type to from column/type
-        auto variant = ColumnVariant::create(true /*always nullable*/);
+        const auto& data_type_to = block.get_by_position(result).type;
+        const auto* variant_type =
+                typeid_cast<const 
DataTypeVariant*>(remove_nullable(data_type_to).get());
+        auto variant = ColumnVariant::create(
+                variant_type ? variant_type->variant_max_subcolumns_count() : 
0,
+                variant_type ? variant_type->enable_doc_mode() : false);
         variant->create_root(from_type, col_from->assume_mutable());
         block.replace_by_position(result, std::move(variant));
         return Status::OK();
diff --git a/be/src/exprs/function/function_variant_element.cpp 
b/be/src/exprs/function/function_variant_element.cpp
index 7ea5c4f9258..27963fe30b9 100644
--- a/be/src/exprs/function/function_variant_element.cpp
+++ b/be/src/exprs/function/function_variant_element.cpp
@@ -77,7 +77,8 @@ public:
         auto arg_variant = remove_nullable(arguments[0]);
         const auto& data_type_object = assert_cast<const 
DataTypeVariant&>(*arg_variant);
         return make_nullable(
-                
std::make_shared<DataTypeVariant>(data_type_object.variant_max_subcolumns_count()));
+                
std::make_shared<DataTypeVariant>(data_type_object.variant_max_subcolumns_count(),
+                                                  
data_type_object.enable_doc_mode()));
     }
 
     // wrap variant column with nullable
@@ -199,7 +200,6 @@ private:
                                                       const PathInData& path,
                                                       
ColumnVariant::MutablePtr& target_ptr) {
         ColumnVariant::Subcolumn root {0, true, true};
-        // no root, no sparse column
         const auto& doc_value_data_map =
                 assert_cast<const 
ColumnMap&>(*src_ptr->get_doc_value_column());
         const auto& src_doc_value_data_offsets = 
doc_value_data_map.get_offsets();
@@ -207,11 +207,13 @@ private:
                 assert_cast<const 
ColumnString&>(doc_value_data_map.get_keys());
         const auto& src_doc_value_data_values =
                 assert_cast<const 
ColumnString&>(doc_value_data_map.get_values());
-        auto& sparse_data_offsets =
-                
assert_cast<ColumnMap&>(*target_ptr->get_sparse_column()->assume_mutable())
+        // Write extracted data into target's doc_value column (not sparse) to 
preserve
+        // doc mode invariant: doc_mode columns must not have sparse data.
+        auto& doc_value_offsets =
+                
assert_cast<ColumnMap&>(*target_ptr->get_doc_value_column()->assume_mutable())
                         .get_offsets();
-        auto [sparse_data_paths, sparse_data_values] =
-                target_ptr->get_sparse_data_paths_and_values();
+        auto [doc_value_paths, doc_value_values] =
+                target_ptr->get_doc_value_data_paths_and_values();
         StringRef prefix_ref(path.get_path());
         std::string_view path_prefix(prefix_ref.data, prefix_ref.size);
         for (size_t i = 0; i != src_doc_value_data_offsets.size(); ++i) {
@@ -225,33 +227,26 @@ private:
                 if (!nested_path.starts_with(path_prefix)) {
                     break;
                 }
-                // Don't include path that is equal to the prefix.
                 if (nested_path.size() != path_prefix.size()) {
                     auto sub_path_optional = get_sub_path(nested_path, 
path_prefix);
                     if (!sub_path_optional.has_value()) {
                         continue;
                     }
                     std::string_view sub_path = *sub_path_optional;
-                    sparse_data_paths->insert_data(sub_path.data(), 
sub_path.size());
-                    sparse_data_values->insert_from(src_doc_value_data_values, 
lower_bound_index);
+                    doc_value_paths->insert_data(sub_path.data(), 
sub_path.size());
+                    doc_value_values->insert_from(src_doc_value_data_values, 
lower_bound_index);
                 } else {
-                    // insert into root column, example:  access v['b'] and b 
is in sparse column
-                    // data example:
-                    // {"b" : 123}
-                    // {"b" : {"c" : 456}}
-                    // b maybe in sparse column, and b.c is in subolumn, put 
`b` into root column to distinguish
-                    // from "" which is empty path and root
                     
root.deserialize_from_binary_column(&src_doc_value_data_values,
                                                         lower_bound_index);
                 }
             }
-            if (root.size() == sparse_data_offsets.size()) {
+            if (root.size() == doc_value_offsets.size()) {
                 root.insert_default();
             }
-            sparse_data_offsets.push_back(sparse_data_paths->size());
+            doc_value_offsets.push_back(doc_value_paths->size());
         }
         target_ptr->get_subcolumns().create_root(root);
-        
target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size());
+        
target_ptr->get_sparse_column()->assume_mutable()->resize(src_ptr->size());
         target_ptr->set_num_rows(src_ptr->size());
     }
 
@@ -259,7 +254,7 @@ private:
                                      ColumnPtr* result) {
         std::string field_name = index_column->get_data_at(0).to_string();
         if (src.empty()) {
-            *result = ColumnVariant::create(src.max_subcolumns_count());
+            *result = ColumnVariant::create(src.max_subcolumns_count(), 
src.enable_doc_mode());
             // src subcolumns empty but src row count may not be 0
             (*result)->assume_mutable()->insert_many_defaults(src.size());
             // ColumnVariant should be finalized before parsing, finalize 
maybe modify original column structure
@@ -286,7 +281,7 @@ private:
                     result_column->insert_default();
                 }
             }
-            *result = ColumnVariant::create(src.max_subcolumns_count(), type,
+            *result = ColumnVariant::create(src.max_subcolumns_count(), 
src.enable_doc_mode(), type,
                                             std::move(result_column));
             (*result)->assume_mutable()->finalize();
             return Status::OK();
@@ -296,7 +291,8 @@ private:
             PathInData path(field_name);
             ColumnVariant::Subcolumns subcolumns = 
mutable_ptr->get_subcolumns();
             const auto* node = subcolumns.find_exact(path);
-            MutableColumnPtr result_col = 
ColumnVariant::create(src.max_subcolumns_count());
+            MutableColumnPtr result_col =
+                    ColumnVariant::create(src.max_subcolumns_count(), 
src.enable_doc_mode());
             ColumnVariant::Subcolumns new_subcolumns;
 
             if (node != nullptr) {
@@ -321,19 +317,22 @@ private:
                     new_subcolumns.create_root(ColumnVariant::Subcolumn {
                             
nodes[0]->data.get_finalized_column_ptr()->assume_mutable(),
                             nodes[0]->data.get_least_common_type(), true, 
true});
-                    auto container = 
ColumnVariant::create(src.max_subcolumns_count(),
-                                                           
std::move(new_subcolumns));
+                    auto container =
+                            ColumnVariant::create(src.max_subcolumns_count(), 
src.enable_doc_mode(),
+                                                  std::move(new_subcolumns));
                     result_col->insert_range_from(*container, 0, 
container->size());
                 } else {
-                    auto container = 
ColumnVariant::create(src.max_subcolumns_count(),
-                                                           
std::move(new_subcolumns));
+                    auto container =
+                            ColumnVariant::create(src.max_subcolumns_count(), 
src.enable_doc_mode(),
+                                                  std::move(new_subcolumns));
                     container->clear_sparse_column();
                     _extract_sparse_column_from_source(mutable_ptr, path, 
container);
                     result_col->insert_range_from(*container, 0, 
container->size());
                 }
             } else {
-                auto container = 
ColumnVariant::create(src.max_subcolumns_count(),
-                                                       
std::move(new_subcolumns));
+                auto container =
+                        ColumnVariant::create(src.max_subcolumns_count(), 
src.enable_doc_mode(),
+                                              std::move(new_subcolumns));
                 const auto& sparse_offsets = 
mutable_ptr->serialized_sparse_column_offsets();
                 if (sparse_offsets.back() == sparse_offsets[-1]) {
                     _extract_doc_value_column_from_source(mutable_ptr, path, 
container);
diff --git a/be/src/exprs/table_function/vexplode.cpp 
b/be/src/exprs/table_function/vexplode.cpp
index 8a0b961bb33..ae52cb9175b 100644
--- a/be/src/exprs/table_function/vexplode.cpp
+++ b/be/src/exprs/table_function/vexplode.cpp
@@ -49,6 +49,7 @@ Status VExplodeTableFunction::_process_init_variant(Block* 
block, int value_colu
     auto& variant_column = 
assert_cast<ColumnVariant&>(*(column->assume_mutable()));
     variant_column.finalize();
     _detail.output_as_variant = true;
+    _detail.variant_enable_doc_mode = variant_column.enable_doc_mode();
     if (!variant_column.is_null_root()) {
         _array_column = variant_column.get_root();
         // We need to wrap the output nested column within a variant column.
diff --git a/be/src/exprs/table_function/vexplode_v2.cpp 
b/be/src/exprs/table_function/vexplode_v2.cpp
index 2894453948a..964bc8f2d6e 100644
--- a/be/src/exprs/table_function/vexplode_v2.cpp
+++ b/be/src/exprs/table_function/vexplode_v2.cpp
@@ -56,6 +56,7 @@ Status VExplodeV2TableFunction::_process_init_variant(Block* 
block, int value_co
     auto& variant_column = 
assert_cast<ColumnVariant&>(*(column->assume_mutable()));
     variant_column.finalize();
     _multi_detail[children_column_idx].output_as_variant = true;
+    _multi_detail[children_column_idx].variant_enable_doc_mode = 
variant_column.enable_doc_mode();
     if (!variant_column.is_null_root()) {
         _array_columns[children_column_idx] = variant_column.get_root();
         // We need to wrap the output nested column within a variant column.
diff --git a/be/src/storage/segment/segment.cpp 
b/be/src/storage/segment/segment.cpp
index fdeeb3e752c..ec0b706bb54 100644
--- a/be/src/storage/segment/segment.cpp
+++ b/be/src/storage/segment/segment.cpp
@@ -939,11 +939,12 @@ Status Segment::seek_and_read_by_rowid(const 
TabletSchema& schema, SlotDescripto
         // if segment cache miss, column reader will be created to make sure 
the variant column result not coredump
         RETURN_IF_ERROR(_create_column_meta_once(storage_read_options.stats));
 
+        const auto& dt_variant =
+                assert_cast<const 
DataTypeVariant&>(*remove_nullable(slot->type()));
         TabletColumn column = TabletColumn::create_materialized_variant_column(
                 schema.column_by_uid(slot->col_unique_id()).name_lower_case(), 
slot->column_paths(),
-                slot->col_unique_id(),
-                assert_cast<const 
DataTypeVariant&>(*remove_nullable(slot->type()))
-                        .variant_max_subcolumns_count());
+                slot->col_unique_id(), 
dt_variant.variant_max_subcolumns_count(),
+                dt_variant.enable_doc_mode());
         auto storage_type = get_data_type_of(column, storage_read_options);
         MutableColumnPtr file_storage_column = storage_type->create_column();
         DCHECK(storage_type != nullptr);
diff --git a/be/src/storage/segment/segment_writer.cpp 
b/be/src/storage/segment/segment_writer.cpp
index 3f5fae5792c..deef80e8e20 100644
--- a/be/src/storage/segment/segment_writer.cpp
+++ b/be/src/storage/segment/segment_writer.cpp
@@ -171,6 +171,7 @@ void SegmentWriter::init_column_meta(ColumnMetaPB* meta, 
uint32_t column_id,
     meta->set_be_exec_version(column.get_be_exec_version());
     if (column.is_variant_type()) {
         
meta->set_variant_max_subcolumns_count(column.variant_max_subcolumns_count());
+        meta->set_variant_enable_doc_mode(column.variant_enable_doc_mode());
     }
 }
 
diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp 
b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
index 8c219b2f982..e19804899a5 100644
--- a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
+++ b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
@@ -226,7 +226,7 @@ Status HierarchicalDataIterator::_process_nested_columns(
         const auto* base_array =
                 
check_and_get_column<ColumnArray>(*remove_nullable(entry.second[0].column));
         MutableColumnPtr nested_object =
-                ColumnVariant::create(0 /*no sparse column*/, 
base_array->get_data().size());
+                ColumnVariant::create(0, false, base_array->get_data().size());
         MutableColumnPtr offset = 
base_array->get_offsets_ptr()->assume_mutable();
         auto* nested_object_ptr = 
assert_cast<ColumnVariant*>(nested_object.get());
         // flatten nested arrays
@@ -271,7 +271,8 @@ Status HierarchicalDataIterator::_process_nested_columns(
 }
 
 Status HierarchicalDataIterator::_init_container(MutableColumnPtr& container, 
size_t nrows,
-                                                 int32_t max_subcolumns_count) 
{
+                                                 int32_t max_subcolumns_count,
+                                                 bool enable_doc_mode) {
     // build variant as container
     // add root first
     if (_path.get_parts().empty() && _root_reader) {
@@ -290,12 +291,13 @@ Status 
HierarchicalDataIterator::_init_container(MutableColumnPtr& container, si
         auto nullable_column = make_nullable(column->get_ptr());
         auto type = make_nullable(_root_reader->type);
         // make sure the root type is nullable
-        container = ColumnVariant::create(max_subcolumns_count, type,
+        container = ColumnVariant::create(max_subcolumns_count, 
enable_doc_mode, type,
                                           nullable_column->assume_mutable());
     } else {
         DataTypePtr root_type = std::make_shared<DataTypeNothing>();
         auto column = ColumnNothing::create(nrows);
-        container = ColumnVariant::create(max_subcolumns_count, root_type, 
std::move(column));
+        container = ColumnVariant::create(max_subcolumns_count, 
enable_doc_mode, root_type,
+                                          std::move(column));
     }
 
     auto& container_variant = assert_cast<ColumnVariant&>(*container);
@@ -334,7 +336,7 @@ Status 
HierarchicalDataIterator::_init_container(MutableColumnPtr& container, si
     RETURN_IF_ERROR(_process_nested_columns(container_variant, 
nested_subcolumns, nrows));
     {
         
SCOPED_RAW_TIMER(&_stats->variant_fill_path_from_sparse_column_timer_ns);
-        RETURN_IF_ERROR(_process_sparse_column(container_variant, nrows));
+        RETURN_IF_ERROR(_process_binary_column(container_variant, nrows));
     }
 
     container_variant.set_num_rows(nrows);
@@ -352,7 +354,7 @@ static std::optional<std::string_view> get_sub_path(const 
std::string_view& path
     return path.substr(prefix.size() + 1);
 }
 
-Status HierarchicalDataIterator::_process_sparse_column(ColumnVariant& 
container_variant,
+Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& 
container_variant,
                                                         size_t nrows) {
     container_variant.clear_sparse_column();
     // process sparse column
@@ -368,6 +370,58 @@ Status 
HierarchicalDataIterator::_process_sparse_column(ColumnVariant& container
         }
         ENABLE_CHECK_CONSISTENCY(&container_variant);
         return Status::OK();
+    } else if (_read_type == ReadType::DOC_VALUE_COLUMN) {
+        // Doc mode hierarchical read: extract sub-paths matching prefix from 
source
+        // doc_value and write them (with prefix stripped) into container's 
doc_value.
+        // No subcolumn materialization — preserves doc-mode invariant.
+        const auto& src_map = assert_cast<const 
ColumnMap&>(*_binary_column_reader->column);
+        const auto& src_offsets = src_map.get_offsets();
+        const auto& src_paths = assert_cast<const 
ColumnString&>(src_map.get_keys());
+        const auto& src_values = assert_cast<const 
ColumnString&>(src_map.get_values());
+
+        // Clear pre-initialized doc_value offsets (created by ColumnVariant 
ctor with num_rows)
+        container_variant.get_doc_value_column()->assume_mutable()->clear();
+        auto [dst_paths, dst_values] = 
container_variant.get_doc_value_data_paths_and_values();
+        auto& dst_offsets = 
container_variant.serialized_doc_value_column_offsets();
+
+        StringRef prefix_ref(_path.get_path());
+        std::string_view path_prefix(prefix_ref.data, prefix_ref.size);
+
+        for (size_t i = 0; i != src_offsets.size(); ++i) {
+            size_t start = src_offsets[ssize_t(i) - 1];
+            size_t end = src_offsets[ssize_t(i)];
+            size_t lower_bound_index = 
ColumnVariant::find_path_lower_bound_in_sparse_data(
+                    prefix_ref, src_paths, start, end);
+            for (; lower_bound_index != end; ++lower_bound_index) {
+                auto path_ref = src_paths.get_data_at(lower_bound_index);
+                std::string_view path(path_ref.data, path_ref.size);
+                if (!path.starts_with(path_prefix)) {
+                    break;
+                }
+                if (path.size() == path_prefix.size()) {
+                    // Exact match (e.g. querying v['obj'] and path is 'obj') 
→ root value
+                    if (container_variant.is_null_root()) {
+                        
container_variant.get_subcolumn({})->resize(dst_offsets.size());
+                    }
+                    
container_variant.get_subcolumn({})->deserialize_from_binary_column(
+                            &src_values, lower_bound_index);
+                    continue;
+                }
+                auto sub_path_optional = get_sub_path(path, path_prefix);
+                if (!sub_path_optional.has_value()) {
+                    continue;
+                }
+                std::string_view sub_path = *sub_path_optional;
+                dst_paths->insert_data(sub_path.data(), sub_path.size());
+                dst_values->insert_from(src_values, lower_bound_index);
+            }
+            if (!container_variant.is_null_root() &&
+                container_variant.get_subcolumn({})->size() == 
dst_offsets.size()) {
+                container_variant.get_subcolumn({})->insert_default();
+            }
+            dst_offsets.push_back(dst_paths->size());
+        }
+        container_variant.get_sparse_column()->assume_mutable()->resize(nrows);
     } else {
         const auto& offsets =
                 assert_cast<const 
ColumnMap&>(*_binary_column_reader->column).get_offsets();
diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.h 
b/be/src/storage/segment/variant/hierarchical_data_iterator.h
index eb5e29093e3..8c0e3366b1e 100644
--- a/be/src/storage/segment/variant/hierarchical_data_iterator.h
+++ b/be/src/storage/segment/variant/hierarchical_data_iterator.h
@@ -122,14 +122,15 @@ private:
             ColumnVariant& container_variant,
             const std::map<PathInData, PathsWithColumnAndType>& 
nested_subcolumns, size_t nrows);
 
-    Status _process_sparse_column(ColumnVariant& container_variant, size_t 
nrows);
+    Status _process_binary_column(ColumnVariant& container_variant, size_t 
nrows);
 
     // 1. add root column
     // 2. collect path for subcolumns and nested subcolumns
     // 3. init container with subcolumns
     // 4. init container with nested subcolumns
     // 5. init container with sparse column
-    Status _init_container(MutableColumnPtr& container, size_t nrows, int 
max_subcolumns_count);
+    Status _init_container(MutableColumnPtr& container, size_t nrows, int 
max_subcolumns_count,
+                           bool enable_doc_mode);
 
     // clear all subcolumns's column data for next batch read
     // set null map for nullable column
@@ -170,7 +171,8 @@ private:
         }
 
         MutableColumnPtr container;
-        RETURN_IF_ERROR(_init_container(container, nrows, 
variant.max_subcolumns_count()));
+        RETURN_IF_ERROR(_init_container(container, nrows, 
variant.max_subcolumns_count(),
+                                        variant.enable_doc_mode()));
         auto& container_variant = assert_cast<ColumnVariant&>(*container);
         variant.insert_range_from(container_variant, 0, nrows);
 
diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp 
b/be/src/storage/segment/variant/variant_column_reader.cpp
index 93f896c80b5..9d06b6ebb84 100644
--- a/be/src/storage/segment/variant/variant_column_reader.cpp
+++ b/be/src/storage/segment/variant/variant_column_reader.cpp
@@ -309,8 +309,10 @@ Result<BinaryColumnCacheSPtr> 
VariantColumnReader::_get_binary_column_cache(
 DataTypePtr create_variant_type(const TabletColumn& target_col) {
     return target_col.is_nullable()
                    ? make_nullable(std::make_shared<DataTypeVariant>(
-                             target_col.variant_max_subcolumns_count()))
-                   : 
std::make_shared<DataTypeVariant>(target_col.variant_max_subcolumns_count());
+                             target_col.variant_max_subcolumns_count(),
+                             target_col.variant_enable_doc_mode()))
+                   : 
std::make_shared<DataTypeVariant>(target_col.variant_max_subcolumns_count(),
+                                                       
target_col.variant_enable_doc_mode());
 }
 
 Status VariantColumnReader::_build_read_plan_flat_leaves(
@@ -782,8 +784,14 @@ Status VariantColumnReader::_build_read_plan(ReadPlan* 
plan, const TabletColumn&
     if (_has_prefix_path_unlocked(relative_path)) {
         // Example {"b" : {"c":456,"e":7.111}}
         // b.c is sparse column, b.e is subcolumn, so b is both the prefix of 
sparse column and
-        // subcolumn
-        plan->kind = ReadKind::HIERARCHICAL;
+        // subcolumn.
+        // Doc mode: prefer extracting hierarchy from doc_value column to 
preserve doc mode
+        // invariant (root-only + doc_value). Non-doc mode: read from 
subcolumns + sparse.
+        if (target_col.variant_enable_doc_mode()) {
+            plan->kind = ReadKind::HIERARCHICAL_DOC;
+        } else {
+            plan->kind = ReadKind::HIERARCHICAL;
+        }
         plan->type = create_variant_type(target_col);
         plan->relative_path = relative_path;
         plan->node = node;
@@ -1417,7 +1425,7 @@ Status 
VariantRootColumnIterator::_process_root_column(MutableColumnPtr& dst,
     }
 
     // add root column to a tmp object column
-    auto tmp = ColumnVariant::create(0, root_column->size());
+    auto tmp = ColumnVariant::create(0, obj.enable_doc_mode(), 
root_column->size());
     auto& tmp_obj = assert_cast<ColumnVariant&>(*tmp);
     tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
     // 
tmp_obj.get_sparse_column()->assume_mutable()->insert_many_defaults(root_column->size());
diff --git a/be/src/storage/segment/variant/variant_column_writer_impl.cpp 
b/be/src/storage/segment/variant/variant_column_writer_impl.cpp
index 3017881a4fb..4f87a1640b3 100644
--- a/be/src/storage/segment/variant/variant_column_writer_impl.cpp
+++ b/be/src/storage/segment/variant/variant_column_writer_impl.cpp
@@ -84,6 +84,7 @@ void _init_column_meta(ColumnMetaPB* meta, uint32_t 
column_id, const TabletColum
     }
     if (column.is_variant_type()) {
         
meta->set_variant_max_subcolumns_count(column.variant_max_subcolumns_count());
+        meta->set_variant_enable_doc_mode(column.variant_enable_doc_mode());
     }
 }
 
@@ -1167,7 +1168,7 @@ Status VariantColumnWriterImpl::init() {
     if (_opts.rowset_ctx->write_type == DataWriteType::TYPE_DIRECT) {
         count = 0;
     }
-    _column = ColumnVariant::create(count);
+    _column = ColumnVariant::create(count, 
_tablet_column->variant_enable_doc_mode());
     return Status::OK();
 }
 
@@ -1589,7 +1590,7 @@ VariantSubcolumnWriter::VariantSubcolumnWriter(const 
ColumnWriterOptions& opts,
         : ColumnWriter(std::move(field), opts.meta->is_nullable(), opts.meta) {
     _tablet_column = column;
     _opts = opts;
-    _column = ColumnVariant::create(0);
+    _column = ColumnVariant::create(0, false);
 }
 
 Status VariantSubcolumnWriter::init() {
@@ -1715,7 +1716,7 @@ VariantDocCompactWriter::VariantDocCompactWriter(const 
ColumnWriterOptions& opts
         : ColumnWriter(std::move(field), opts.meta->is_nullable(), opts.meta) {
     _opts = opts;
     _tablet_column = column;
-    _column = ColumnVariant::create(0);
+    _column = ColumnVariant::create(0, false);
 }
 
 Status VariantDocCompactWriter::init() {
diff --git 
a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h 
b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
index acc90eea121..7f2d63a0457 100644
--- a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
+++ b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
@@ -55,7 +55,8 @@ private:
     Status _set_doc_value_into_variant(MutableColumnPtr& dst, 
MutableColumnPtr&& doc_value_column,
                                        size_t count) const {
         auto& variant = assert_cast<ColumnVariant&>(*dst);
-        MutableColumnPtr container = 
ColumnVariant::create(variant.max_subcolumns_count(), count);
+        MutableColumnPtr container = 
ColumnVariant::create(variant.max_subcolumns_count(),
+                                                           
variant.enable_doc_mode(), count);
         auto& container_variant = assert_cast<ColumnVariant&>(*container);
         container_variant.set_doc_value_column(std::move(doc_value_column));
         variant.insert_range_from(container_variant, 0, count);
diff --git 
a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp 
b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
index d6a2817b6d8..0b993ac8a88 100644
--- a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
+++ b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
@@ -109,7 +109,7 @@ Status 
VariantStreamingCompactionWriter::_append_input_from_raw(const uint8_t**
 Status VariantStreamingCompactionWriter::_append_input(const ColumnVariant& 
src, size_t row_pos,
                                                        size_t num_rows,
                                                        const uint8_t* 
outer_null_map) {
-    auto chunk_variant = ColumnVariant::create(0);
+    auto chunk_variant = ColumnVariant::create(0, src.enable_doc_mode());
     chunk_variant->insert_range_from(src, row_pos, num_rows);
     RETURN_IF_ERROR(chunk_variant->sanitize());
     chunk_variant->finalize();
diff --git a/be/src/storage/segment/vertical_segment_writer.cpp 
b/be/src/storage/segment/vertical_segment_writer.cpp
index 215d069425f..a3728111fa2 100644
--- a/be/src/storage/segment/vertical_segment_writer.cpp
+++ b/be/src/storage/segment/vertical_segment_writer.cpp
@@ -174,6 +174,7 @@ void VerticalSegmentWriter::_init_column_meta(ColumnMetaPB* 
meta, uint32_t colum
     }
     if (column.is_variant_type()) {
         
meta->set_variant_max_subcolumns_count(column.variant_max_subcolumns_count());
+        meta->set_variant_enable_doc_mode(column.variant_enable_doc_mode());
     }
     meta->set_result_is_nullable(column.get_result_is_nullable());
     meta->set_function_name(column.get_aggregation_name());
diff --git a/be/src/storage/tablet/tablet_meta.cpp 
b/be/src/storage/tablet/tablet_meta.cpp
index a183a391cc4..4bacda4ad40 100644
--- a/be/src/storage/tablet/tablet_meta.cpp
+++ b/be/src/storage/tablet/tablet_meta.cpp
@@ -551,8 +551,8 @@ void TabletMeta::init_column_from_tcolumn(uint32_t 
unique_id, const TColumn& tco
     if (tcolumn.__isset.variant_sparse_hash_shard_count) {
         
column->set_variant_sparse_hash_shard_count(tcolumn.variant_sparse_hash_shard_count);
     }
-    if (tcolumn.__isset.variant_enable_doc_mode) {
-        column->set_variant_enable_doc_mode(tcolumn.variant_enable_doc_mode);
+    if (tcolumn.column_type.__isset.variant_enable_doc_mode) {
+        
column->set_variant_enable_doc_mode(tcolumn.column_type.variant_enable_doc_mode);
     }
     if (tcolumn.__isset.variant_doc_materialization_min_rows) {
         column->set_variant_doc_materialization_min_rows(
diff --git a/be/src/storage/tablet/tablet_schema.cpp 
b/be/src/storage/tablet/tablet_schema.cpp
index ae0190a754c..15569cacdd0 100644
--- a/be/src/storage/tablet/tablet_schema.cpp
+++ b/be/src/storage/tablet/tablet_schema.cpp
@@ -689,7 +689,8 @@ void TabletColumn::init_from_pb(const ColumnPB& column) {
 TabletColumn TabletColumn::create_materialized_variant_column(const 
std::string& root,
                                                               const 
std::vector<std::string>& paths,
                                                               int32_t 
parent_unique_id,
-                                                              int32_t 
max_subcolumns_count) {
+                                                              int32_t 
max_subcolumns_count,
+                                                              bool 
enable_doc_mode) {
     TabletColumn subcol;
     subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
     subcol.set_is_nullable(true);
@@ -699,6 +700,7 @@ TabletColumn 
TabletColumn::create_materialized_variant_column(const std::string&
     subcol.set_path_info(path);
     subcol.set_name(path.get_path());
     subcol.set_variant_max_subcolumns_count(max_subcolumns_count);
+    subcol.set_variant_enable_doc_mode(enable_doc_mode);
     return subcol;
 }
 
diff --git a/be/src/storage/tablet/tablet_schema.h 
b/be/src/storage/tablet/tablet_schema.h
index 63a21e122cf..1fae1a9b8ec 100644
--- a/be/src/storage/tablet/tablet_schema.h
+++ b/be/src/storage/tablet/tablet_schema.h
@@ -132,7 +132,8 @@ public:
     static TabletColumn create_materialized_variant_column(const std::string& 
root,
                                                            const 
std::vector<std::string>& paths,
                                                            int32_t 
parent_unique_id,
-                                                           int32_t 
max_subcolumns_count);
+                                                           int32_t 
max_subcolumns_count,
+                                                           bool 
enable_doc_mode = false);
     bool has_default_value() const { return _has_default_value; }
     std::string default_value() const { return _default_value; }
     int32_t length() const { return _length; }
diff --git a/be/test/core/column/column_variant_test.cpp 
b/be/test/core/column/column_variant_test.cpp
index f473124c55d..2c81fe1a107 100644
--- a/be/test/core/column/column_variant_test.cpp
+++ b/be/test/core/column/column_variant_test.cpp
@@ -65,7 +65,7 @@ protected:
         test_data_dir = root_dir + "/be/test/data/vec/columns";
         test_result_dir = root_dir + "/be/test/expected_result/vec/columns";
 
-        column_variant = ColumnVariant::create(true);
+        column_variant = ColumnVariant::create(0, false);
         std::cout << dt_variant->get_name() << std::endl;
 
         load_json_columns_data();
@@ -116,7 +116,7 @@ protected:
 
     template <typename T>
     void column_common_test(T callback) {
-        callback(ColumnVariant(true), column_variant->get_ptr());
+        callback(ColumnVariant(0, false), column_variant->get_ptr());
     }
 
     void hash_common_test(
@@ -765,7 +765,7 @@ TEST_F(ColumnVariantTest, empty_inset_range_from) {
     EXPECT_EQ(src->size(), 6);
 
     // dst is an empty column
-    auto dst = ColumnVariant::create(5);
+    auto dst = ColumnVariant::create(5, false);
 
     // subcolumn->subcolumn          v.a v.b v.c v.f v.e
     dst->insert_range_from(*src, 0, 6);
@@ -951,7 +951,7 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
     // Test case 1: Insert from scalar variant source to empty destination
     {
         // Create source column with scalar values
-        auto src_column = ColumnVariant::create(true);
+        auto src_column = ColumnVariant::create(0, false);
         VariantUtil::insert_root_scalar_field(*src_column, 
Field::create_field<TYPE_INT>(123));
         VariantUtil::insert_root_scalar_field(*src_column, 
Field::create_field<TYPE_INT>(456));
         src_column->finalize();
@@ -960,7 +960,7 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
         EXPECT_EQ(src_column->size(), 2);
 
         // Create empty destination column
-        auto dst_column = ColumnVariant::create(true);
+        auto dst_column = ColumnVariant::create(0, false);
         EXPECT_EQ(dst_column->size(), 0);
 
         // Create indices
@@ -991,14 +991,14 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
     // Test case 2: Insert from scalar variant source to non-empty destination 
of same type
     {
         // Create source column with scalar values
-        auto src_column = ColumnVariant::create(true);
+        auto src_column = ColumnVariant::create(0, false);
         VariantUtil::insert_root_scalar_field(*src_column, 
Field::create_field<TYPE_INT>(123));
         VariantUtil::insert_root_scalar_field(*src_column, 
Field::create_field<TYPE_INT>(456));
         src_column->finalize();
         EXPECT_TRUE(src_column->is_scalar_variant());
 
         // Create destination column with same type
-        auto dst_column = ColumnVariant::create(true);
+        auto dst_column = ColumnVariant::create(0, false);
         VariantUtil::insert_root_scalar_field(*dst_column, 
Field::create_field<TYPE_INT>(789));
         dst_column->finalize();
         EXPECT_TRUE(dst_column->is_scalar_variant());
@@ -1027,7 +1027,7 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
     // Test case 3: Insert from non-scalar or different type source (fallback 
to try_insert)
     {
         // Create source column with object values (non-scalar)
-        auto src_column = ColumnVariant::create(true);
+        auto src_column = ColumnVariant::create(0, false);
 
         // Create a map with {"a": 123}
         Field field_map = Field::create_field<TYPE_VARIANT>(VariantMap());
@@ -1050,7 +1050,7 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
         EXPECT_FALSE(src_column->is_scalar_variant());
 
         // Create destination column (empty)
-        auto dst_column = ColumnVariant::create(true);
+        auto dst_column = ColumnVariant::create(0, false);
 
         // Create indices
         std::vector<uint32_t> indices = {1, 0};
@@ -1173,7 +1173,7 @@ TEST_F(ColumnVariantTest, field_test) {
         }
     };
     ColumnVariant::MutablePtr obj;
-    obj = ColumnVariant::create(1);
+    obj = ColumnVariant::create(1, false);
     MutableColumns cols;
     cols.push_back(obj->get_ptr());
     const auto& json_file_obj = test_data_dir_json + 
"json_variant/object_boundary.jsonl";
@@ -1214,7 +1214,7 @@ TEST_F(ColumnVariantTest, serialize_one_row_to_string) {
     {
         // TEST SCALA_VARAINT
         // 1. create an empty variant column
-        auto v = ColumnVariant::create(true);
+        auto v = ColumnVariant::create(0, false);
         auto dt = 
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_STRING, 
0,
                                                                0);
         auto cs = dt->create_column();
@@ -1663,7 +1663,7 @@ TEST_F(ColumnVariantTest, get_subcolumn) {
 
 TEST_F(ColumnVariantTest, ensure_root_node_type) {
     ColumnVariant::MutablePtr obj;
-    obj = ColumnVariant::create(1);
+    obj = ColumnVariant::create(1, false);
     MutableColumns cols;
     cols.push_back(obj->get_ptr());
     const auto& json_file_obj = test_data_dir_json + 
"json_variant/object_boundary.jsonl";
@@ -2124,7 +2124,7 @@ TEST_F(ColumnVariantTest, 
find_path_lower_bound_in_sparse_data) {
         }
     };
     ColumnVariant::MutablePtr obj;
-    obj = ColumnVariant::create(1);
+    obj = ColumnVariant::create(1, false);
     MutableColumns cols;
     cols.push_back(obj->get_ptr());
     const auto& json_file_obj = test_data_dir_json + 
"json_variant/object_boundary.jsonl";
@@ -2137,7 +2137,7 @@ TEST_F(ColumnVariantTest, 
find_path_lower_bound_in_sparse_data) {
 // used in BinaryColumnExtractIterator::_fill_path_column
 TEST_F(ColumnVariantTest, fill_path_column_from_sparse_data) {
     ColumnVariant::MutablePtr obj;
-    obj = ColumnVariant::create(1);
+    obj = ColumnVariant::create(1, false);
     MutableColumns cols;
     cols.push_back(obj->get_ptr());
     const auto& json_file_obj = test_data_dir_json + 
"json_variant/object_boundary.jsonl";
@@ -2164,7 +2164,7 @@ TEST_F(ColumnVariantTest, 
fill_path_column_from_sparse_data) {
 
 TEST_F(ColumnVariantTest, not_finalized) {
     ColumnVariant::MutablePtr obj;
-    obj = ColumnVariant::create(1);
+    obj = ColumnVariant::create(1, false);
     MutableColumns cols;
     cols.push_back(obj->get_ptr());
 
@@ -2331,7 +2331,7 @@ TEST_F(ColumnVariantTest, array_field_operations) {
         {
             // Test wrapp_array_nullable
             // 1. create an empty variant column
-            auto variant = ColumnVariant::create(2);
+            auto variant = ColumnVariant::create(2, false);
 
             std::vector<std::pair<std::string, doris::Field>> data;
 
@@ -2376,12 +2376,12 @@ TEST_F(ColumnVariantTest, assert_exception_happen) {
         dynamic_subcolumns.add(PathInData("v.b.d"), ColumnVariant::Subcolumn 
{0, true});
         dynamic_subcolumns.add(PathInData("v.c.d"), ColumnVariant::Subcolumn 
{0, true});
         std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size() 
<< std::endl;
-        EXPECT_ANY_THROW(ColumnVariant::create(2, 
std::move(dynamic_subcolumns)));
+        EXPECT_ANY_THROW(ColumnVariant::create(2, false, 
std::move(dynamic_subcolumns)));
     }
 
     {
         // 1. create an empty variant column
-        auto variant = ColumnVariant::create(5);
+        auto variant = ColumnVariant::create(5, false);
 
         std::vector<std::pair<std::string, doris::Field>> data;
 
@@ -2467,7 +2467,7 @@ TEST_F(ColumnVariantTest, try_insert_default_from_nested) 
{
                            ColumnVariant::Subcolumn {std::move(column), 
array_type, false, false});
     dynamic_subcolumns.add(PathInData("v.c.d"), ColumnVariant::Subcolumn {0, 
true});
     std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size() << 
std::endl;
-    auto obj = ColumnVariant::create(5, std::move(dynamic_subcolumns));
+    auto obj = ColumnVariant::create(5, false, std::move(dynamic_subcolumns));
 
     for (auto& entry : obj->get_subcolumns()) {
         std::cout << "entry path: " << entry->path.get_path() << std::endl;
@@ -2504,7 +2504,7 @@ TEST_F(ColumnVariantTest, unnest) {
                            ColumnVariant::Subcolumn {std::move(nested_col),
                                                      
ColumnVariant::NESTED_TYPE, true, false});
     std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size() << 
std::endl;
-    auto obj = ColumnVariant::create(2, std::move(dynamic_subcolumns));
+    auto obj = ColumnVariant::create(2, false, std::move(dynamic_subcolumns));
     obj->set_num_rows(2);
     EXPECT_TRUE(!obj->empty());
     std::cout << obj->size() << std::endl;
@@ -2513,7 +2513,7 @@ TEST_F(ColumnVariantTest, unnest) {
 
 TEST_F(ColumnVariantTest, path_in_data_builder_test) {
     // Create a ColumnVariant with nested subcolumns
-    auto variant = ColumnVariant::create(5);
+    auto variant = ColumnVariant::create(5, false);
 
     // Test case 1: Build a nested path with PathInDataBuilder
     {
@@ -3171,7 +3171,7 @@ TEST_F(ColumnVariantTest, subcolumn_operations_coverage) {
         col_arr->insert(an);
         col_arr->insert(an);
         MutableColumnPtr nested_object = ColumnVariant::create(
-                container_variant.max_subcolumns_count(), 
col_arr->get_data().size());
+                container_variant.max_subcolumns_count(), false, 
col_arr->get_data().size());
         MutableColumnPtr offset = 
col_arr->get_offsets_ptr()->assume_mutable(); // [3, 3, 4]
         auto* nested_object_ptr = 
assert_cast<ColumnVariant*>(nested_object.get());
         // flatten nested arrays
@@ -3203,9 +3203,9 @@ TEST_F(ColumnVariantTest, subcolumn_operations_coverage) {
 
     // Test is_empty_nested
     {
-        auto v = ColumnVariant::create(1);
+        auto v = ColumnVariant::create(1, false);
         auto sub_dt = make_nullable(std::make_unique<DataTypeArray>(
-                make_nullable(std::make_unique<DataTypeVariant>(1))));
+                make_nullable(std::make_unique<DataTypeVariant>(1, false))));
         auto sub_col = sub_dt->create_column();
 
         std::vector<std::pair<std::string, doris::Field>> data;
@@ -3468,7 +3468,7 @@ TEST_F(ColumnVariantTest, 
subcolumn_insert_range_from_test_advanced) {
 }
 
 TEST_F(ColumnVariantTest, test_variant_no_data_insert) {
-    auto variant = ColumnVariant::create(1);
+    auto variant = ColumnVariant::create(1, false);
     variant->insert_many_defaults(10);
     EXPECT_EQ(variant->size(), 10);
     EXPECT_TRUE(variant->only_have_default_values());
diff --git a/be/test/exec/common/schema_util_rowset_test.cpp 
b/be/test/exec/common/schema_util_rowset_test.cpp
index aa9c2fd13cd..cf99c982495 100644
--- a/be/test/exec/common/schema_util_rowset_test.cpp
+++ b/be/test/exec/common/schema_util_rowset_test.cpp
@@ -706,7 +706,7 @@ TEST_F(SchemaUtilRowsetTest, 
some_test_for_subcolumn_writer) {
     auto size = variant_subcolumn_writer->estimate_buffer_size();
     std::cout << "size: " << size << std::endl;
     // append data
-    auto insert_object = ColumnVariant::create(true);
+    auto insert_object = ColumnVariant::create(0, false);
     fill_varaint_column(insert_object, 1, 1);
     std::cout << insert_object->debug_string() << std::endl;
     std::unique_ptr<VariantColumnData> _variant_column_data = 
std::make_unique<VariantColumnData>();
diff --git a/be/test/exec/common/schema_util_test.cpp 
b/be/test/exec/common/schema_util_test.cpp
index 1a9b4617c52..db13e17159d 100644
--- a/be/test/exec/common/schema_util_test.cpp
+++ b/be/test/exec/common/schema_util_test.cpp
@@ -707,8 +707,8 @@ TEST_F(SchemaUtilTest, TestParseVariantColumns) {
     Block block;
 
     // Create a variant column with JSON string data
-    auto variant_type = std::make_shared<DataTypeVariant>(10);
-    auto variant_column = ColumnVariant::create(10);
+    auto variant_type = std::make_shared<DataTypeVariant>(10, false);
+    auto variant_column = ColumnVariant::create(10, false);
     auto root_column = ColumnString::create();
     root_column->insert(Field::create_field<PrimitiveType::TYPE_STRING>("{'a': 
1, 'b': 'test'}"));
     variant_column->create_root(std::make_shared<DataTypeString>(), 
root_column->get_ptr());
@@ -773,7 +773,7 @@ TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) {
     EXPECT_EQ(result->size(), 1);
 
     // Test casting to variant type
-    auto variant_type = std::make_shared<DataTypeVariant>(10);
+    auto variant_type = std::make_shared<DataTypeVariant>(10, false);
     auto nullable_array_type =
             
make_nullable(std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>()));
     auto array_column =
@@ -801,7 +801,7 @@ TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) {
     EXPECT_TRUE(result1->is_nullable());
 
     // Test casting from variant to variant
-    auto variant_column = ColumnVariant::create(10);
+    auto variant_column = ColumnVariant::create(10, false);
     variant_column->create_root(nullable_array_type, 
nullable_array_column->assume_mutable());
 
     ColumnWithTypeAndName variant_col;
@@ -1215,8 +1215,8 @@ TEST_F(SchemaUtilTest, TestParseVariantColumnsEdgeCases) {
     Block block;
 
     // Test parsing from string to variant
-    auto variant_type = std::make_shared<DataTypeVariant>(10);
-    auto variant_column = ColumnVariant::create(10);
+    auto variant_type = std::make_shared<DataTypeVariant>(10, false);
+    auto variant_column = ColumnVariant::create(10, false);
     auto root_column = ColumnString::create();
 
     // Add some test JSON data
@@ -1239,7 +1239,7 @@ TEST_F(SchemaUtilTest, TestParseVariantColumnsEdgeCases) {
     auto jsonb_column = ColumnString::create();
     
jsonb_column->insert(Field::create_field<PrimitiveType::TYPE_STRING>("{'x': 
1}"));
 
-    auto variant_column2 = ColumnVariant::create(10);
+    auto variant_column2 = ColumnVariant::create(10, false);
     variant_column2->create_root(jsonb_type, jsonb_column->get_ptr());
 
     Block block2;
@@ -1249,7 +1249,7 @@ TEST_F(SchemaUtilTest, TestParseVariantColumnsEdgeCases) {
     EXPECT_TRUE(status.ok());
 
     // Test parsing already parsed variant
-    auto variant_column3 = ColumnVariant::create(10);
+    auto variant_column3 = ColumnVariant::create(10, false);
     variant_column3->finalize();
 
     Block block3;
@@ -1263,14 +1263,14 @@ TEST_F(SchemaUtilTest, 
TestParseVariantColumnsWithNulls) {
     Block block;
 
     // Create a nullable variant column
-    auto variant_type = make_nullable(std::make_shared<DataTypeVariant>(10));
+    auto variant_type = make_nullable(std::make_shared<DataTypeVariant>(10, 
false));
     auto string_type = make_nullable(std::make_shared<DataTypeString>());
 
     auto string_column = ColumnString::create();
     
string_column->insert(Field::create_field<PrimitiveType::TYPE_STRING>("{'a': 
1}"));
     auto nullable_string = make_nullable(string_column->get_ptr());
 
-    auto variant_column = ColumnVariant::create(10);
+    auto variant_column = ColumnVariant::create(10, false);
     variant_column->create_root(string_type, 
nullable_string->assume_mutable());
     auto nullable_variant = make_nullable(variant_column->get_ptr());
 
@@ -1866,7 +1866,7 @@ TEST_F(SchemaUtilTest, 
parse_and_materialize_variant_columns_ambiguous_paths) {
     dynamic_subcolumns.create_root(
             ColumnVariant::Subcolumn(string_col->assume_mutable(), 
string_type, true));
 
-    auto variant_col = ColumnVariant::create(0, std::move(dynamic_subcolumns));
+    auto variant_col = ColumnVariant::create(0, false, 
std::move(dynamic_subcolumns));
     auto variant_type = std::make_shared<DataTypeVariant>();
 
     // Construct the block
diff --git a/be/test/exprs/function/cast/function_variant_cast_test.cpp 
b/be/test/exprs/function/cast/function_variant_cast_test.cpp
index 2f97e2e85eb..2ee76058bc6 100644
--- a/be/test/exprs/function/cast/function_variant_cast_test.cpp
+++ b/be/test/exprs/function/cast/function_variant_cast_test.cpp
@@ -49,7 +49,7 @@ static doris::Field construct_variant_map(
 
 static auto construct_basic_varint_column() {
     // 1. create an empty variant column
-    auto variant = ColumnVariant::create(5);
+    auto variant = ColumnVariant::create(5, false);
 
     std::vector<std::pair<std::string, doris::Field>> data;
 
@@ -171,7 +171,7 @@ TEST(FunctionVariantCast, CastFromVariant) {
     {
         auto variant_type = std::make_shared<DataTypeVariant>();
         auto int32_type = std::make_shared<DataTypeInt32>();
-        auto variant_col = ColumnVariant::create(0);
+        auto variant_col = ColumnVariant::create(0, false);
 
         // Create a variant column with integer values
         variant_col->create_root(int32_type, ColumnInt32::create());
@@ -210,7 +210,7 @@ TEST(FunctionVariantCast, CastFromVariant) {
     {
         auto variant_type = std::make_shared<DataTypeVariant>();
         auto string_type = std::make_shared<DataTypeString>();
-        auto variant_col = ColumnVariant::create(0);
+        auto variant_col = ColumnVariant::create(0, false);
 
         // Create a variant column with string values
         variant_col->create_root(string_type, ColumnString::create());
@@ -246,7 +246,7 @@ TEST(FunctionVariantCast, CastFromVariant) {
     {
         auto variant_type = std::make_shared<DataTypeVariant>();
         auto array_type = 
std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>());
-        auto variant_col = ColumnVariant::create(0);
+        auto variant_col = ColumnVariant::create(0, false);
 
         // Create a variant column with array values
         variant_col->create_root(
@@ -294,7 +294,7 @@ TEST(FunctionVariantCast, CastVariantWithNull) {
     auto nullable_int32_type = std::make_shared<DataTypeNullable>(int32_type);
 
     // Create a variant column with nullable integer values
-    auto variant_col = ColumnVariant::create(0);
+    auto variant_col = ColumnVariant::create(0, false);
     variant_col->create_root(nullable_int32_type,
                              ColumnNullable::create(ColumnInt32::create(), 
ColumnUInt8::create()));
     MutableColumnPtr data = variant_col->get_root();
@@ -343,7 +343,7 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
         ColumnVariant::Subcolumns dynamic_subcolumns;
         dynamic_subcolumns.add(PathInData(ColumnVariant::COLUMN_NAME_DUMMY),
                                ColumnVariant::Subcolumn {root->get_ptr(), 
int32_type, true, true});
-        auto variant_col = ColumnVariant::create(0, 
std::move(dynamic_subcolumns));
+        auto variant_col = ColumnVariant::create(0, false, 
std::move(dynamic_subcolumns));
 
         variant_col->finalize();
         ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), 
variant_type, "variant_col"},
@@ -481,7 +481,7 @@ TEST(FunctionVariantCast, 
CastFromVariantStrictModeRegression) {
         auto nullable_int32_type = 
std::make_shared<DataTypeNullable>(int32_type);
 
         // Create variant column with nullable integer root (some null, some 
not)
-        auto variant_col = ColumnVariant::create(0);
+        auto variant_col = ColumnVariant::create(0, false);
         variant_col->create_root(
                 nullable_int32_type,
                 ColumnNullable::create(ColumnInt32::create(), 
ColumnUInt8::create()));
@@ -557,7 +557,7 @@ TEST(FunctionVariantCast, 
CastFromVariantStrictModeRegression) {
         auto nullable_string_type =
                 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
 
-        auto variant_col = ColumnVariant::create(0);
+        auto variant_col = ColumnVariant::create(0, false);
         variant_col->create_root(
                 nullable_string_type,
                 ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create()));
diff --git a/be/test/exprs/function/function_variant_element_test.cpp 
b/be/test/exprs/function/function_variant_element_test.cpp
index 76ac61a98ba..d4d413a601a 100644
--- a/be/test/exprs/function/function_variant_element_test.cpp
+++ b/be/test/exprs/function/function_variant_element_test.cpp
@@ -22,7 +22,7 @@
 namespace doris {
 
 TEST(function_variant_element_test, extract_from_sparse_column) {
-    auto variant_column = ColumnVariant::create(1 /*max_subcolumns_count*/);
+    auto variant_column = ColumnVariant::create(1 /*max_subcolumns_count*/, 
false);
     auto* variant_ptr = assert_cast<ColumnVariant*>(variant_column.get());
 
     ColumnVariant::Subcolumn subcolumn(0, true, false);
diff --git a/be/test/storage/segment/hierarchical_data_iterator_test.cpp 
b/be/test/storage/segment/hierarchical_data_iterator_test.cpp
index 107cbd0b1ce..614e73dff6a 100644
--- a/be/test/storage/segment/hierarchical_data_iterator_test.cpp
+++ b/be/test/storage/segment/hierarchical_data_iterator_test.cpp
@@ -96,10 +96,10 @@ TEST(HierarchicalDataIteratorTest, 
ProcessSparseExtractSubpaths) {
     offs.push_back(keys.size());
 
     const size_t nrows = 2;
-    MutableColumnPtr dst = ColumnVariant::create(/*max_subcolumns_count*/ 2, 
nrows);
+    MutableColumnPtr dst = ColumnVariant::create(/*max_subcolumns_count*/ 2, 
false, nrows);
 
     auto& variant = assert_cast<ColumnVariant&>(*dst);
-    ASSERT_TRUE(hiter->_process_sparse_column(variant, nrows).ok());
+    ASSERT_TRUE(hiter->_process_binary_column(variant, nrows).ok());
 
     // root column + 2 subcolumns
     EXPECT_EQ(variant.get_subcolumns().size(), 3);
diff --git a/be/test/storage/segment/nested_group_provider_test.cpp 
b/be/test/storage/segment/nested_group_provider_test.cpp
index f59ac52cfaa..7baa6f51fcb 100644
--- a/be/test/storage/segment/nested_group_provider_test.cpp
+++ b/be/test/storage/segment/nested_group_provider_test.cpp
@@ -58,7 +58,7 @@ TEST(NestedGroupProviderTest, DefaultWriteProviderIsNoOp) {
         GTEST_SKIP() << "EE build: write provider has real implementation";
     }
 
-    auto column_variant = ColumnVariant::create(0);
+    auto column_variant = ColumnVariant::create(0, false);
     ColumnWriterOptions opts;
     VariantStatistics statistics;
 
diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp 
b/be/test/storage/segment/variant_column_writer_reader_test.cpp
index 2fa6642e340..e309aa6f2b1 100644
--- a/be/test/storage/segment/variant_column_writer_reader_test.cpp
+++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp
@@ -230,8 +230,8 @@ protected:
         for (const auto& batch : batches) {
             Block block = _tablet_schema->create_block();
             auto columns = block.mutate_columns();
-            auto variant_col =
-                    
ColumnVariant::create(_tablet_schema->column(0).variant_max_subcolumns_count());
+            auto variant_col = ColumnVariant::create(
+                    _tablet_schema->column(0).variant_max_subcolumns_count(), 
false);
             auto json_col = ColumnString::create();
             for (const auto& json : batch) {
                 json_col->insert_data(json.data(), json.size());
@@ -271,8 +271,8 @@ protected:
 
         Block block = _tablet_schema->create_block();
         auto columns = block.mutate_columns();
-        auto variant_col =
-                
ColumnVariant::create(_tablet_schema->column(0).variant_max_subcolumns_count());
+        auto variant_col = ColumnVariant::create(
+                _tablet_schema->column(0).variant_max_subcolumns_count(), 
false);
         auto json_col = ColumnString::create();
         for (const auto& json : jsons) {
             json_col->insert_data(json.data(), json.size());
@@ -432,7 +432,7 @@ static std::set<std::string> collect_regular_paths(
 
 static std::vector<std::string> normalize_json_rows(const 
std::vector<std::string>& jsons,
                                                     int 
variant_max_subcolumns_count) {
-    auto variant_col = ColumnVariant::create(variant_max_subcolumns_count);
+    auto variant_col = ColumnVariant::create(variant_max_subcolumns_count, 
false);
     auto json_col = ColumnString::create();
     for (const auto& json : jsons) {
         json_col->insert_data(json.data(), json.size());
@@ -685,7 +685,7 @@ TEST_F(VariantColumnWriterReaderTest, 
test_write_data_normal) {
     }
 
     auto read_to_column_object = [&](ColumnIteratorUPtr& it) {
-        new_column_object = ColumnVariant::create(3);
+        new_column_object = ColumnVariant::create(3, false);
         nrows = 1000;
         st = it->seek_to_ordinal(0);
         EXPECT_TRUE(st.ok()) << st.msg();
@@ -872,7 +872,8 @@ TEST_F(VariantColumnWriterReaderTest, 
test_write_data_normal) {
     // test VariantRootColumnIterator for next_batch and read_by_rowids
     {
         auto iter = assert_cast<VariantRootColumnIterator*>(it3.get());
-        auto nullable_dt = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeVariant>(3));
+        auto nullable_dt =
+                
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeVariant>(3, false));
         MutableColumnPtr root_column_object = nullable_dt->create_column();
         nrows = 1000;
         st = iter->seek_to_ordinal(0);
@@ -981,19 +982,19 @@ TEST_F(VariantColumnWriterReaderTest, 
test_write_data_normal) {
         for (int i = 0; i < 1000; ++i) {
             row_ids1.push_back(i);
         }
-        MutableColumnPtr sparse_dst1 = ColumnVariant::create(3);
+        MutableColumnPtr sparse_dst1 = ColumnVariant::create(3, false);
         st = iter->read_by_rowids(row_ids1.data(), row_ids1.size(), 
sparse_dst1);
         EXPECT_TRUE(st.ok()) << st.msg();
         EXPECT_TRUE(sparse_dst1->size() == row_ids1.size());
         // test to nullable column object
         std::cout << "test 2 " << std::endl;
         MutableColumnPtr sparse_dst2 =
-                ColumnNullable::create(ColumnVariant::create(3), 
ColumnUInt8::create());
+                ColumnNullable::create(ColumnVariant::create(3, false), 
ColumnUInt8::create());
         st = iter->read_by_rowids(row_ids1.data(), row_ids1.size(), 
sparse_dst2);
         EXPECT_TRUE(st.ok()) << st.msg();
         EXPECT_TRUE(sparse_dst2->size() == row_ids1.size());
         std::cout << "test 3" << std::endl;
-        MutableColumnPtr sparse_dst3 = ColumnVariant::create(3);
+        MutableColumnPtr sparse_dst3 = ColumnVariant::create(3, false);
         size_t rs = 1000;
         bool has_null = false;
         st = iter->seek_to_ordinal(0);
@@ -1528,11 +1529,11 @@ TEST_F(VariantColumnWriterReaderTest, 
test_write_doc_compact_writer_and_read_doc
     config.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
 
     MutableColumnPtr root_variant =
-            
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+            
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
     variant_util::parse_json_to_variant(*root_variant, *full_strings, config);
 
     MutableColumnPtr bucket_variant =
-            
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+            
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
     variant_util::parse_json_to_variant(*bucket_variant, *bucket_strings, 
config);
 
     // 6. append and write
@@ -1703,7 +1704,7 @@ TEST_F(VariantColumnWriterReaderTest, 
test_doc_compact_sparse_write_array_gap) {
     parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
 
     MutableColumnPtr bucket_variant =
-            
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+            
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
     variant_util::parse_json_to_variant(*bucket_variant, *strings, parse_cfg);
 
     auto bucket_data = std::make_unique<VariantColumnData>();
@@ -1804,7 +1805,7 @@ TEST_F(VariantColumnWriterReaderTest, 
test_write_doc_sparse_write_array_gap_and_
     parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
 
     MutableColumnPtr variant_column =
-            
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+            
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
     variant_util::parse_json_to_variant(*variant_column, *strings, parse_cfg);
 
     auto variant_data = std::make_unique<VariantColumnData>();
@@ -2012,7 +2013,7 @@ TEST_F(VariantColumnWriterReaderTest, 
test_write_data_advanced) {
     st = it->init(column_iter_opts);
     EXPECT_TRUE(st.ok()) << st.msg();
 
-    MutableColumnPtr new_column_object = ColumnVariant::create(3);
+    MutableColumnPtr new_column_object = ColumnVariant::create(3, false);
     size_t nrows = 1000;
     st = it->seek_to_ordinal(0);
     EXPECT_TRUE(st.ok()) << st.msg();
@@ -2031,7 +2032,7 @@ TEST_F(VariantColumnWriterReaderTest, 
test_write_data_advanced) {
     }
 
     auto read_to_column_object = [&](ColumnIteratorUPtr& it) {
-        new_column_object = ColumnVariant::create(10);
+        new_column_object = ColumnVariant::create(10, false);
         nrows = 1000;
         st = it->seek_to_ordinal(0);
         EXPECT_TRUE(st.ok()) << st.msg();
@@ -3301,7 +3302,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) {
     st = nested_iter->init(column_iter_opts);
     EXPECT_TRUE(st.ok()) << st.msg();
     // fill with nullable ColumnVariant target
-    MutableColumnPtr new_column_object1 = ColumnVariant::create(3);
+    MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false);
     MutableColumnPtr null_object =
             ColumnNullable::create(new_column_object1->assume_mutable(), 
ColumnUInt8::create());
     size_t n = 1000;
@@ -3313,7 +3314,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) {
     EXPECT_TRUE(stats.bytes_read > 0);
     {
         // fill with nullable ColumnVariant target
-        MutableColumnPtr new_column_object12 = ColumnVariant::create(3);
+        MutableColumnPtr new_column_object12 = ColumnVariant::create(3, false);
         MutableColumnPtr null_object12 = ColumnNullable::create(
                 new_column_object12->assume_mutable(), ColumnUInt8::create());
         st = nested_iter->seek_to_ordinal(0);
@@ -3345,7 +3346,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) {
         st = nested_iter2->init(column_iter_opts);
         EXPECT_TRUE(st.ok()) << st.msg();
         // fill with nullable ColumnVariant target
-        MutableColumnPtr new_column_object2 = ColumnVariant::create(3);
+        MutableColumnPtr new_column_object2 = ColumnVariant::create(3, false);
         MutableColumnPtr null_object2 =
                 ColumnNullable::create(new_column_object2->assume_mutable(), 
ColumnUInt8::create());
         size_t nrows = 1000;
@@ -3393,7 +3394,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) {
         nested_subcolumns[parent_path].emplace_back(second);
         // test _process_with_nested_column with different type
         // init container which is ColumnVariant
-        MutableColumnPtr nested_column_object = ColumnVariant::create(3);
+        MutableColumnPtr nested_column_object = ColumnVariant::create(3, 
false);
         auto& container_variant = 
assert_cast<ColumnVariant&>(*nested_column_object);
         st = nested_iter2->_process_nested_columns(container_variant, 
nested_subcolumns, n);
         std::cout << st.msg() << std::endl;
@@ -3455,7 +3456,7 @@ TEST_F(VariantColumnWriterReaderTest, 
test_nested_iter_nullable) {
     st = nested_iter->init(column_iter_opts);
     EXPECT_TRUE(st.ok()) << st.msg();
     // fill with nullable ColumnVariant target
-    MutableColumnPtr new_column_object1 = ColumnVariant::create(3);
+    MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false);
     MutableColumnPtr null_object =
             ColumnNullable::create(new_column_object1->assume_mutable(), 
ColumnUInt8::create());
     size_t nrows = 1000;
diff --git a/be/test/storage/segment/variant_util_test.cpp 
b/be/test/storage/segment/variant_util_test.cpp
index 8e9a81149cd..8980021bf9d 100644
--- a/be/test/storage/segment/variant_util_test.cpp
+++ b/be/test/storage/segment/variant_util_test.cpp
@@ -49,7 +49,7 @@ TEST(VariantUtilTest, 
ParseDocValueToSubcolumns_FillsDefaultsAndValues) {
             R"({"a":3})",         //
     };
 
-    auto variant = ColumnVariant::create(0);
+    auto variant = ColumnVariant::create(0, true);
     auto json_col = _make_json_column(jsons);
 
     ParseConfig cfg;
@@ -101,7 +101,7 @@ TEST(VariantUtilTest, 
ParseOnlyDocValueColumn_SerializesMixedTypes) {
             R"({"b":false,"arr":[4],"s":"y"})",
     };
 
-    auto variant = ColumnVariant::create(0);
+    auto variant = ColumnVariant::create(0, true);
     auto json_col = _make_json_column(jsons);
 
     ParseConfig cfg;
@@ -185,14 +185,14 @@ TEST(VariantUtilTest, 
ParseVariantColumns_ScalarJsonStringToSubcolumns) {
     TabletSchema tablet_schema;
     tablet_schema.init_from_pb(schema_pb);
 
-    auto variant = ColumnVariant::create(0);
+    auto variant = ColumnVariant::create(0, false);
     doris::VariantUtil::insert_root_scalar_field(
             *variant, Field::create_field<TYPE_STRING>(String(R"({"a":1})")));
     doris::VariantUtil::insert_root_scalar_field(
             *variant, Field::create_field<TYPE_STRING>(String(R"({"a":2})")));
 
     Block block;
-    block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0), 
"v"});
+    block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0, 
false), "v"});
 
     const std::vector<uint32_t> column_pos {0};
     Status st = parse_and_materialize_variant_columns(block, tablet_schema, 
column_pos);
@@ -219,7 +219,7 @@ TEST(VariantUtilTest, 
ParseVariantColumns_DocModeBinaryToSubcolumns) {
     };
 
     // Build a doc-mode ColumnVariant: Only root in subcolumns, others stored 
in doc snapshot column.
-    auto variant = ColumnVariant::create(0);
+    auto variant = ColumnVariant::create(0, true);
     auto json_col = _make_json_column(jsons);
     ParseConfig cfg;
     cfg.deprecated_enable_flatten_nested = false;
@@ -228,7 +228,7 @@ TEST(VariantUtilTest, 
ParseVariantColumns_DocModeBinaryToSubcolumns) {
     ASSERT_TRUE(variant->is_doc_mode());
 
     Block block;
-    block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0), 
"v"});
+    block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0, 
true), "v"});
 
     ParseConfig parse_cfg;
     parse_cfg.deprecated_enable_flatten_nested = false;
@@ -271,7 +271,7 @@ TEST(VariantUtilTest, 
ParseVariantColumns_DocModeBinaryToSubcolumns) {
 
 TEST(VariantUtilTest, ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) {
     const std::vector<std::string_view> jsons = {R"({"a":1})"};
-    auto variant = ColumnVariant::create(0);
+    auto variant = ColumnVariant::create(0, true);
     auto json_col = _make_json_column(jsons);
 
     ParseConfig cfg;
@@ -281,7 +281,7 @@ TEST(VariantUtilTest, 
ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) {
     ASSERT_TRUE(variant->is_doc_mode());
 
     Block block;
-    block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0), 
"v"});
+    block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0, 
true), "v"});
 
     ParseConfig parse_cfg;
     parse_cfg.deprecated_enable_flatten_nested = false;
diff --git a/be/test/testutil/variant_util.h b/be/test/testutil/variant_util.h
index 9d3cdea9e84..17959afcbd6 100644
--- a/be/test/testutil/variant_util.h
+++ b/be/test/testutil/variant_util.h
@@ -70,7 +70,7 @@ public:
 
     static auto construct_basic_varint_column() {
         // 1. create an empty variant column
-        auto variant = ColumnVariant::create(5);
+        auto variant = ColumnVariant::create(5, false);
 
         std::vector<std::pair<std::string, doris::Field>> data;
 
@@ -105,12 +105,12 @@ public:
         dynamic_subcolumns.add(PathInData("v.b"), ColumnVariant::Subcolumn {0, 
true});
         dynamic_subcolumns.add(PathInData("v.b.d"), ColumnVariant::Subcolumn 
{0, true});
         dynamic_subcolumns.add(PathInData("v.c.d"), ColumnVariant::Subcolumn 
{0, true});
-        return ColumnVariant::create(5, std::move(dynamic_subcolumns));
+        return ColumnVariant::create(5, false, std::move(dynamic_subcolumns));
     }
 
     static auto construct_advanced_varint_column() {
         // 1. create an empty variant column
-        auto variant = ColumnVariant::create(5);
+        auto variant = ColumnVariant::create(5, false);
 
         std::vector<std::pair<std::string, doris::Field>> data;
 
@@ -153,7 +153,7 @@ public:
 
     static auto construct_varint_column_only_subcolumns() {
         // 1. create an empty variant column
-        auto variant = ColumnVariant::create(5);
+        auto variant = ColumnVariant::create(5, false);
 
         std::vector<std::pair<std::string, doris::Field>> data;
 
@@ -196,7 +196,7 @@ public:
 
     static auto construct_varint_column_more_subcolumns() {
         // 1. create an empty variant column
-        auto variant = ColumnVariant::create(5);
+        auto variant = ColumnVariant::create(5, false);
 
         std::vector<std::pair<std::string, doris::Field>> data;
 
diff --git 
a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java 
b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java
index 69917dbf4b7..df24768c180 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java
@@ -200,6 +200,8 @@ public class VariantType extends ScalarType {
         // set the count
         container.getTypes().get(container.getTypes().size() - 1)
                 
.scalar_type.setVariantMaxSubcolumnsCount(variantMaxSubcolumnsCount);
+        container.getTypes().get(container.getTypes().size() - 1)
+                .scalar_type.setVariantEnableDocMode(enableVariantDocMode);
     }
 
     @Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index 3ea2b7d6879..8a07c55fd3c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -621,6 +621,7 @@ public class Column implements GsonPostProcessable {
         tColumnType.setPrecision(this.getPrecision());
         tColumnType.setScale(this.getScale());
         
tColumnType.setVariantMaxSubcolumnsCount(this.getVariantMaxSubcolumnsCount());
+        tColumnType.setVariantEnableDocMode(this.getVariantEnableDocMode());
 
         tColumnType.setIndexLen(this.getOlapColumnIndexSize());
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java
index 54a77e3e396..d83800ff17a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ElementAt.java
@@ -93,10 +93,9 @@ public class ElementAt extends ScalarFunction
         DataType expressionType = arguments.get(0).getDataType();
         DataType sigType = signature.argumentsTypes.get(0);
         if (expressionType instanceof VariantType && sigType instanceof 
VariantType) {
-            // only keep the variant max subcolumns count
-            VariantType variantType = new VariantType(((VariantType) 
expressionType).getVariantMaxSubcolumnsCount());
-            signature = signature.withArgumentType(0, variantType);
-            signature = signature.withReturnType(variantType);
+            // propagate all variant type properties (doc mode, max 
subcolumns, etc.)
+            signature = signature.withArgumentType(0, (VariantType) 
expressionType);
+            signature = signature.withReturnType((VariantType) expressionType);
         }
         return super.computeSignature(signature);
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index a3b75f40903..3810e30b195 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -396,13 +396,15 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
         } else if (tPrimitiveType == TPrimitiveType.VARIANT) {
             // Preserve VARIANT-specific properties from PTypeNode, especially 
variant_max_subcolumns_count.
             int maxSubcolumns = typeNode.getVariantMaxSubcolumnsCount();
+            boolean enableDocMode = typeNode.hasVariantEnableDocMode()
+                    ? typeNode.getVariantEnableDocMode() : false;
             // Currently no predefined fields are carried in PTypeNode for 
VARIANT, so use empty list and default
             // values for other properties.
             type = new VariantType(new ArrayList<>(), maxSubcolumns,
                     /*enableTypedPathsToSparse*/ false,
                     /*variantMaxSparseColumnStatisticsSize*/ 10000,
                     /*variantSparseHashShardCount*/ 0,
-                    /*variantEnableDocMode*/ false,
+                    /*variantEnableDocMode*/ enableDocMode,
                     /*variantDocMaterializationMinRows*/ 0,
                     /*variantDocShardCount*/ 0,
                     /*enableNestedGroup*/ false);
diff --git a/gensrc/proto/data.proto b/gensrc/proto/data.proto
index bda5daa03bf..9cf8e37d6d4 100644
--- a/gensrc/proto/data.proto
+++ b/gensrc/proto/data.proto
@@ -68,6 +68,7 @@ message PColumnMeta {
     optional int32 be_exec_version = 8;
     optional segment_v2.ColumnPathInfo column_path = 9;
     optional int32 variant_max_subcolumns_count = 10 [default = 0];
+    optional bool variant_enable_doc_mode = 11 [default = false];
 }
 
 message PBlock {
diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto
index 74c5479fd0f..3c8e646acd9 100644
--- a/gensrc/proto/segment_v2.proto
+++ b/gensrc/proto/segment_v2.proto
@@ -230,6 +230,7 @@ message ColumnMetaPB {
     optional uint64 compressed_data_bytes = 24;
     optional uint64 uncompressed_data_bytes = 25;
     optional uint64 raw_data_bytes = 26;
+    optional bool variant_enable_doc_mode = 27 [default = false];
 }
 
 // External column meta entry describing one top-level column's externalized
diff --git a/gensrc/proto/types.proto b/gensrc/proto/types.proto
index 49bff7c4049..d0947b7a2af 100644
--- a/gensrc/proto/types.proto
+++ b/gensrc/proto/types.proto
@@ -56,6 +56,7 @@ message PTypeNode {
 
     // only used for VARIANT
     optional int32 variant_max_subcolumns_count = 6 [default = 0];
+    optional bool variant_enable_doc_mode = 7 [default = false];
 };
 
 // A flattened representation of a tree of column types obtained by depth-first
diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift
index a60a63ceb96..5641c3b3125 100644
--- a/gensrc/thrift/Descriptors.thrift
+++ b/gensrc/thrift/Descriptors.thrift
@@ -96,7 +96,7 @@ struct TColumn {
     23: optional bool is_on_update_current_timestamp = false
     24: optional i32 variant_max_sparse_column_statistics_size = 10000
     25: optional i32 variant_sparse_hash_shard_count
-    26: optional bool variant_enable_doc_mode
+    26: optional bool variant_enable_doc_mode // deprecated, use 
TColumnType.variant_enable_doc_mode
   27: optional i64 variant_doc_materialization_min_rows
   28: optional i32 variant_doc_hash_shard_count
   29: optional bool variant_enable_nested_group
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index e2abaaace9c..fe09635b5e2 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -145,6 +145,7 @@ struct TScalarType {
 
     // Only set for VARIANT
     5: optional i32 variant_max_subcolumns_count = 0;
+    6: optional bool variant_enable_doc_mode = false;
 }
 
 // Represents a field in a STRUCT type.
@@ -283,6 +284,7 @@ struct TColumnType {
   4: optional i32 precision
   5: optional i32 scale
   6: optional i32 variant_max_subcolumns_count = 0;
+  7: optional bool variant_enable_doc_mode = false;
 }
 
 // A TNetworkAddress is the standard host, port representation of a


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to