github-actions[bot] commented on code in PR #45492:
URL: https://github.com/apache/doris/pull/45492#discussion_r1886886120


##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -220,6 +222,146 @@ Status ColumnReader::create_agg_state(const 
ColumnReaderOptions& opts, const Col
                                  agg_state_type->get_name(), int(type));
 }
 
+const SubcolumnColumnReaders::Node* VariantColumnReader::get_reader_by_path(
+        const vectorized::PathInData& relative_path) const {
+    return _subcolumn_readers->find_leaf(relative_path);
+}
+
+Status VariantColumnReader::new_iterator(ColumnIterator** iterator,

Review Comment:
   warning: function 'new_iterator' has cognitive complexity of 51 (threshold 
50) [readability-function-cognitive-complexity]
   ```cpp
   Status VariantColumnReader::new_iterator(ColumnIterator** iterator,
                               ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/column_reader.cpp:237:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (node != nullptr) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:238:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (node->is_leaf_node()) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:242:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(node->data.reader->new_iterator(iterator));
               ^
   ```
   **be/src/common/status.h:632:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:242:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(node->data.reader->new_iterator(iterator));
               ^
   ```
   **be/src/common/status.h:634:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:243:** +1, nesting level 
increased to 2
   ```cpp
           } else {
             ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:247:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (!_sparse_column_set_in_stats.empty()) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:250:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   RETURN_IF_ERROR(_sparse_column_reader->new_iterator(&iter));
                   ^
   ```
   **be/src/common/status.h:632:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:250:** +5, including 
nesting penalty of 4, nesting level increased to 5
   ```cpp
                   RETURN_IF_ERROR(_sparse_column_reader->new_iterator(&iter));
                   ^
   ```
   **be/src/common/status.h:634:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:255:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
                       (relative_path == root->path) ? 
HierarchicalDataReader::ReadType::MERGE_ROOT
                                                     ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:257:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(HierarchicalDataReader::create(iterator, 
relative_path, node, root,
               ^
   ```
   **be/src/common/status.h:632:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:257:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(HierarchicalDataReader::create(iterator, 
relative_path, node, root,
               ^
   ```
   **be/src/common/status.h:634:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:260:** +1, nesting level 
increased to 1
   ```cpp
       } else {
         ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:261:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (_sparse_column_set_in_stats.contains(StringRef 
{relative_path.get_path()}) ||
           ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:266:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               
RETURN_IF_ERROR(_sparse_column_reader->new_iterator(&inner_iter));
               ^
   ```
   **be/src/common/status.h:632:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:266:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
               
RETURN_IF_ERROR(_sparse_column_reader->new_iterator(&inner_iter));
               ^
   ```
   **be/src/common/status.h:634:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:269:** +1, nesting level 
increased to 2
   ```cpp
           } else {
             ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:272:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               RETURN_IF_ERROR(Segment::new_default_iterator(target_col, 
&iter));
               ^
   ```
   **be/src/common/status.h:632:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/column_reader.cpp:272:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
               RETURN_IF_ERROR(Segment::new_default_iterator(target_col, 
&iter));
               ^
   ```
   **be/src/common/status.h:634:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   
   </details>
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -1717,89 +1776,203 @@ void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::Alloca
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, std::string* 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    rapidjson::StringBuffer buf;
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         *output = type->to_string(*get_root(), row);
         return Status::OK();
     }
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    // TODO avoid copy
-    *output = std::string(buf.GetString(), buf.GetSize());
+    // TODO preallocate memory
+    auto tmp_col = ColumnString::create();
+    VectorBufferWriter write_buffer(*tmp_col.get());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, write_buffer, 
nullptr));
+    write_buffer.commit();
+    auto str_ref = tmp_col->get_data_at(0);
+    *output = std::string(str_ref.data, str_ref.size);
     return Status::OK();
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, BufferWritable& 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         type->to_string(*get_root(), row, output);
         return Status::OK();
     }
-    rapidjson::StringBuffer buf;
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    output.write(buf.GetString(), buf.GetLength());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, output, nullptr));
     return Status::OK();
 }
 
-Status ColumnObject::serialize_one_row_to_json_format(int64_t row, 
rapidjson::StringBuffer* output,
-                                                      bool* is_null) const {
-    CHECK(is_finalized());
-    if (subcolumns.empty()) {
-        if (is_null != nullptr) {
-            *is_null = true;
-        } else {
-            rapidjson::Value root(rapidjson::kNullType);
-            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
-            if (!root.Accept(writer)) {
-                return Status::InternalError("Failed to serialize json value");
+/// Struct that represents elements of the JSON path.
+/// "a.b.c" -> ["a", "b", "c"]
+struct PathElements {
+    explicit PathElements(const String& path) {
+        const char* start = path.data();
+        const char* end = start + path.size();
+        const char* pos = start;
+        const char* last_dot_pos = pos - 1;
+        for (pos = start; pos != end; ++pos) {
+            if (*pos == '.') {
+                elements.emplace_back(last_dot_pos + 1, size_t(pos - 
last_dot_pos - 1));
+                last_dot_pos = pos;
             }
         }
-        return Status::OK();
+
+        elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 
1));
     }
-    CHECK(size() > row);
-    rapidjson::StringBuffer buffer;
-    rapidjson::Value root(rapidjson::kNullType);
-    if (doc_structure == nullptr) {
-        doc_structure = std::make_shared<rapidjson::Document>();
-        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
-        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+
+    size_t size() const { return elements.size(); }
+
+    std::vector<std::string_view> elements;
+};
+
+/// Struct that represents a prefix of a JSON path. Used during output of the 
JSON object.
+struct Prefix {
+    /// Shrink current prefix to the common prefix of current prefix and 
specified path.
+    /// For example, if current prefix is a.b.c.d and path is a.b.e, then 
shrink the prefix to a.b.
+    void shrink_to_common_prefix(const PathElements& path_elements) {
+        /// Don't include last element in path_elements in the prefix.
+        size_t i = 0;
+        while (i != elements.size() && i != (path_elements.elements.size() - 
1) &&
+               elements[i].first == path_elements.elements[i])
+            ++i;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
                  elements[i].first == path_elements.elements[i]) {
               ++i;
   }
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -150,95 +156,295 @@ ordinal_t HierarchicalDataReader::get_current_ordinal() 
const {
     return (*_substream_reader.begin())->data.iterator->get_current_ordinal();
 }
 
-Status ExtractReader::init(const ColumnIteratorOptions& opts) {
-    if (!_root_reader->inited) {
-        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
-        _root_reader->inited = true;
+Status HierarchicalDataReader::_process_sub_columns(
+        vectorized::ColumnObject& container_variant,
+        const vectorized::PathsWithColumnAndType& non_nested_subcolumns) {
+    for (const auto& entry : non_nested_subcolumns) {
+        DCHECK(!entry.path.has_nested_part());
+        bool add = container_variant.add_sub_column(entry.path, 
entry.column->assume_mutable(),
+                                                    entry.type);
+        if (!add) {
+            return Status::InternalError("Duplicated {}, type {}", 
entry.path.get_path(),
+                                         entry.type->get_name());
+        }
     }
     return Status::OK();
 }
 
-Status ExtractReader::seek_to_first() {
-    LOG(FATAL) << "Not implemented";
-    __builtin_unreachable();
+Status HierarchicalDataReader::_process_nested_columns(
+        vectorized::ColumnObject& container_variant,
+        const std::map<vectorized::PathInData, 
vectorized::PathsWithColumnAndType>&
+                nested_subcolumns) {
+    using namespace vectorized;
+    // Iterate nested subcolumns and flatten them, the entry contains the 
nested subcolumns of the same nested parent
+    // first we pick the first subcolumn as base array and using it's offset 
info. Then we flatten all nested subcolumns
+    // into a new object column and wrap it with array column using the first 
element offsets.The wrapped array column
+    // will type the type of ColumnObject::NESTED_TYPE, whih is 
Nullable<ColumnArray<NULLABLE(ColumnObject)>>.
+    for (const auto& entry : nested_subcolumns) {
+        MutableColumnPtr nested_object = ColumnObject::create(true, false);
+        const auto* base_array =
+                
check_and_get_column<ColumnArray>(remove_nullable(entry.second[0].column));
+        MutableColumnPtr offset = 
base_array->get_offsets_ptr()->assume_mutable();
+        auto* nested_object_ptr = 
assert_cast<ColumnObject*>(nested_object.get());
+        // flatten nested arrays
+        for (const auto& subcolumn : entry.second) {
+            const auto& column = subcolumn.column;
+            const auto& type = subcolumn.type;
+            if (!remove_nullable(column)->is_column_array()) {
+                return Status::InvalidArgument(
+                        "Meet none array column when flatten nested array, 
path {}, type {}",
+                        subcolumn.path.get_path(), subcolumn.type->get_name());
+            }
+            const auto* target_array =
+                    
check_and_get_column<ColumnArray>(remove_nullable(subcolumn.column).get());
+#ifndef NDEBUG
+            if (!base_array->has_equal_offsets(*target_array)) {
+                return Status::InvalidArgument(
+                        "Meet none equal offsets array when flatten nested 
array, path {}, "
+                        "type {}",
+                        subcolumn.path.get_path(), subcolumn.type->get_name());
+            }
+#endif
+            MutableColumnPtr flattend_column = 
target_array->get_data_ptr()->assume_mutable();
+            DataTypePtr flattend_type =
+                    
check_and_get_data_type<DataTypeArray>(remove_nullable(type).get())
+                            ->get_nested_type();
+            // add sub path without parent prefix
+            nested_object_ptr->add_sub_column(
+                    
subcolumn.path.copy_pop_nfront(entry.first.get_parts().size()),
+                    std::move(flattend_column), std::move(flattend_type));
+        }
+        nested_object = 
make_nullable(nested_object->get_ptr())->assume_mutable();
+        auto array =
+                make_nullable(ColumnArray::create(std::move(nested_object), 
std::move(offset)));
+        PathInDataBuilder builder;
+        // add parent prefix
+        builder.append(entry.first.get_parts(), false);
+        PathInData parent_path = builder.build();
+        // unset nested parts
+        parent_path.unset_nested();
+        DCHECK(!parent_path.has_nested_part());
+        container_variant.add_sub_column(parent_path, array->assume_mutable(),
+                                         ColumnObject::NESTED_TYPE);
+    }
+    return Status::OK();
 }
 
-Status ExtractReader::seek_to_ordinal(ordinal_t ord) {
-    CHECK(_root_reader->inited);
-    return _root_reader->iterator->seek_to_ordinal(ord);
-}
+Status HierarchicalDataReader::_init_container(vectorized::MutableColumnPtr& 
container,
+                                               size_t nrows) {
+    using namespace vectorized;
+    // build variant as container
+    container = ColumnObject::create(true, false);
+    auto& container_variant = assert_cast<ColumnObject&>(*container);
 
-Status ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t 
nrows) {
-    DCHECK(_root_reader);
-    DCHECK(_root_reader->inited);
-    vectorized::ColumnNullable* nullable_column = nullptr;
-    if (dst->is_nullable()) {
-        nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get());
+    // add root first
+    if (_path.get_parts().empty() && _root_reader) {
+        auto& root_var =
+                _root_reader->column->is_nullable()
+                        ? assert_cast<vectorized::ColumnObject&>(
+                                  
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+                                          .get_nested_column())
+                        : 
assert_cast<vectorized::ColumnObject&>(*_root_reader->column);
+        auto column = root_var.get_root();
+        auto type = root_var.get_root_type();
+        container_variant.add_sub_column({}, std::move(column), type);
     }
-    auto& variant =
-            nullable_column == nullptr
-                    ? assert_cast<vectorized::ColumnObject&>(*dst)
-                    : 
assert_cast<vectorized::ColumnObject&>(nullable_column->get_nested_column());
-    const auto& root =
-            _root_reader->column->is_nullable()
-                    ? assert_cast<vectorized::ColumnObject&>(
-                              
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
-                                      .get_nested_column())
-                    : assert_cast<const 
vectorized::ColumnObject&>(*_root_reader->column);
-    // extract root value with path, we can't modify the original root column
-    // since some other column may depend on it.
-    vectorized::MutableColumnPtr extracted_column;
-    RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b
-            _col.path_info_ptr()->copy_pop_front(), extracted_column));
-
-    if (_target_type_hint != nullptr) {
-        variant.create_root(_target_type_hint, 
_target_type_hint->create_column());
+    // parent path -> subcolumns
+    std::map<PathInData, PathsWithColumnAndType> nested_subcolumns;
+    PathsWithColumnAndType non_nested_subcolumns;
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        MutableColumnPtr column = node.data.column->get_ptr();
+        PathInData relative_path = 
node.path.copy_pop_nfront(_path.get_parts().size());
+
+        if (node.path.has_nested_part()) {
+            
CHECK_EQ(getTypeName(remove_nullable(node.data.type)->get_type_id()),
+                     getTypeName(TypeIndex::Array));
+            PathInData parent_path =
+                    
node.path.get_nested_prefix_path().copy_pop_nfront(_path.get_parts().size());
+            nested_subcolumns[parent_path].emplace_back(relative_path, 
column->get_ptr(),
+                                                        node.data.type);
+        } else {
+            non_nested_subcolumns.emplace_back(relative_path, 
column->get_ptr(), node.data.type);
+        }
+        return Status::OK();
+    }));
+
+    RETURN_IF_ERROR(_process_sub_columns(container_variant, 
non_nested_subcolumns));
+
+    RETURN_IF_ERROR(_process_nested_columns(container_variant, 
nested_subcolumns));
+
+    RETURN_IF_ERROR(_process_sparse_column(container_variant, nrows));
+    return Status::OK();
+}
+
+// Return sub-path by specified prefix.
+// For example, for prefix a.b:
+// a.b.c.d -> c.d, a.b.c -> c
+static std::string_view get_sub_path(const std::string_view& path, const 
std::string_view& prefix) {
+    return path.substr(prefix.size() + 1);
+}
+
+Status 
HierarchicalDataReader::_process_sparse_column(vectorized::ColumnObject& 
container_variant,
+                                                      size_t nrows) {
+    using namespace vectorized;
+    if (!_sparse_column_reader) {
+        
container_variant.get_sparse_column()->assume_mutable()->insert_many_defaults(nrows);
+        return Status::OK();
     }
-    if (variant.empty() || variant.is_null_root()) {
-        variant.create_root(root.get_root_type(), std::move(extracted_column));
+    // process sparse column
+    if (_path.get_parts().empty()) {
+        // directly use sparse column if access root
+        
container_variant.set_sparse_column(_sparse_column_reader->column->get_ptr());
     } else {
-        vectorized::ColumnPtr cast_column;
-        const auto& expected_type = variant.get_root_type();
-        RETURN_IF_ERROR(vectorized::schema_util::cast_column(
-                {extracted_column->get_ptr(),
-                 vectorized::make_nullable(
-                         
std::make_shared<vectorized::ColumnObject::MostCommonType>()),
-                 ""},
-                expected_type, &cast_column));
-        variant.get_root()->insert_range_from(*cast_column, 0, nrows);
-        // variant.set_num_rows(variant.get_root()->size());
+        const auto& offsets =
+                assert_cast<const 
ColumnMap&>(*_sparse_column_reader->column).get_offsets();
+        /// Check if there is no data in shared data in current range.
+        if (offsets.back() == offsets[-1]) {
+            
container_variant.get_sparse_column()->assume_mutable()->insert_many_defaults(nrows);
+        } else {
+            // Read for variant sparse column
+            // Example path: a.b
+            // data: a.b.c : int|123
+            //       a.b.d : string|"456"
+            //       a.e.d : string|"789"
+            // then the extracted sparse column will be:
+            // c : int|123
+            // d : string|"456"
+            const auto& sparse_data_map =
+                    assert_cast<const 
ColumnMap&>(*_sparse_column_reader->column);
+            const auto& src_sparse_data_offsets = 
sparse_data_map.get_offsets();
+            const auto& src_sparse_data_paths =
+                    assert_cast<const 
ColumnString&>(sparse_data_map.get_keys());
+            const auto& src_sparse_data_values =
+                    assert_cast<const 
ColumnString&>(sparse_data_map.get_values());
+
+            auto& sparse_data_offsets =
+                    assert_cast<ColumnMap&>(
+                            
*container_variant.get_sparse_column()->assume_mutable())
+                            .get_offsets();
+            auto [sparse_data_paths, sparse_data_values] =
+                    container_variant.get_sparse_data_paths_and_values();
+            StringRef prefix_ref(_path.get_path());
+            std::string_view path_prefix(prefix_ref.data, prefix_ref.size);
+            for (size_t i = 0; i != src_sparse_data_offsets.size(); ++i) {
+                size_t start = src_sparse_data_offsets[ssize_t(i) - 1];
+                size_t end = src_sparse_data_offsets[ssize_t(i)];
+                size_t lower_bound_index =
+                        
vectorized::ColumnObject::find_path_lower_bound_in_sparse_data(
+                                prefix_ref, src_sparse_data_paths, start, end);
+                for (; lower_bound_index != end; ++lower_bound_index) {
+                    auto path_ref = 
src_sparse_data_paths.get_data_at(lower_bound_index);
+                    std::string_view path(path_ref.data, path_ref.size);
+                    if (!path.starts_with(path_prefix)) {
+                        break;
+                    }
+                    // Don't include path that is equal to the prefix.
+                    if (path.size() != path_prefix.size()) {
+                        auto sub_path = get_sub_path(path, path_prefix);
+                        sparse_data_paths->insert_data(sub_path.data(), 
sub_path.size());
+                        
sparse_data_values->insert_from(src_sparse_data_values, lower_bound_index);
+                    }
+                }
+                sparse_data_offsets.push_back(sparse_data_paths->size());
+            }
+        }
     }
-    if (dst->is_nullable()) {
-        // fill nullmap
-        vectorized::ColumnUInt8& dst_null_map =
-                
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
-        vectorized::ColumnUInt8& src_null_map =
-                
assert_cast<vectorized::ColumnNullable&>(*variant.get_root()).get_null_map_column();
-        dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::_init_null_map_and_clear_columns(
+        vectorized::MutableColumnPtr& container, vectorized::MutableColumnPtr& 
dst, size_t nrows) {
+    using namespace vectorized;
+    // clear data in nodes
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        node.data.column->clear();
+        return Status::OK();
+    }));
+    container->clear();
+    _sparse_column_reader->column->clear();
+    if (_root_reader) {
+        if (_root_reader->column->is_nullable()) {
+            // fill nullmap
+            DCHECK(dst->is_nullable());
+            ColumnUInt8& dst_null_map = 
assert_cast<ColumnNullable&>(*dst).get_null_map_column();
+            ColumnUInt8& src_null_map =
+                    
assert_cast<ColumnNullable&>(*_root_reader->column).get_null_map_column();
+            dst_null_map.insert_range_from(src_null_map, 0, 
src_null_map.size());
+            // clear nullmap and inner data
+            src_null_map.clear();
+            assert_cast<ColumnObject&>(
+                    
assert_cast<ColumnNullable&>(*_root_reader->column).get_nested_column())
+                    .clear_column_data();
+        } else {
+            auto& root_column = 
assert_cast<ColumnObject&>(*_root_reader->column);
+            root_column.clear_column_data();
+        }
+    } else {
+        if (dst->is_nullable()) {
+            // No nullable info exist in hirearchical data, fill nullmap with 
all none null
+            ColumnUInt8& dst_null_map = 
assert_cast<ColumnNullable&>(*dst).get_null_map_column();
+            auto fake_nullable_column = ColumnUInt8::create(nrows, 0);
+            dst_null_map.insert_range_from(*fake_nullable_column, 0, nrows);
+        }
     }
-    _root_reader->column->clear();
-#ifndef NDEBUG
-    variant.check_consistency();
-#endif
     return Status::OK();
 }
 
-Status ExtractReader::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, 
bool* has_null) {
-    RETURN_IF_ERROR(_root_reader->iterator->next_batch(n, 
_root_reader->column));
-    RETURN_IF_ERROR(extract_to(dst, *n));
+Status SparseColumnExtractReader::init(const ColumnIteratorOptions& opts) {
+    return _sparse_column_reader->init(opts);
+}
+
+Status SparseColumnExtractReader::seek_to_first() {
+    return _sparse_column_reader->seek_to_first();
+}
+
+Status SparseColumnExtractReader::seek_to_ordinal(ordinal_t ord) {
+    return _sparse_column_reader->seek_to_ordinal(ord);
+}
+
+void 
SparseColumnExtractReader::_fill_path_column(vectorized::MutableColumnPtr& dst) 
{
+    vectorized::ColumnObject& var =
+            dst->is_nullable()
+                    ? assert_cast<vectorized::ColumnObject&>(
+                              
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+                    : assert_cast<vectorized::ColumnObject&>(*dst);
+    DCHECK(!var.is_null_root());
+    vectorized::ColumnObject::fill_path_olumn_from_sparse_data(
+            *var.get_subcolumn({}) /*root*/, StringRef {_path.data(), 
_path.size()},
+            _sparse_column->get_ptr(), 0, _sparse_column->size());
+    _sparse_column->clear();
+}
+
+Status SparseColumnExtractReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,

Review Comment:
   warning: pointer parameter 'n' can be pointer to const 
[readability-non-const-parameter]
   
   ```suggestion
   Status SparseColumnExtractReader::next_batch(const size_t* n, 
vectorized::MutableColumnPtr& dst,
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -1717,89 +1776,203 @@
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, std::string* 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    rapidjson::StringBuffer buf;
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         *output = type->to_string(*get_root(), row);
         return Status::OK();
     }
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    // TODO avoid copy
-    *output = std::string(buf.GetString(), buf.GetSize());
+    // TODO preallocate memory
+    auto tmp_col = ColumnString::create();
+    VectorBufferWriter write_buffer(*tmp_col.get());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, write_buffer, 
nullptr));
+    write_buffer.commit();
+    auto str_ref = tmp_col->get_data_at(0);
+    *output = std::string(str_ref.data, str_ref.size);
     return Status::OK();
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, BufferWritable& 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         type->to_string(*get_root(), row, output);
         return Status::OK();
     }
-    rapidjson::StringBuffer buf;
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    output.write(buf.GetString(), buf.GetLength());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, output, nullptr));
     return Status::OK();
 }
 
-Status ColumnObject::serialize_one_row_to_json_format(int64_t row, 
rapidjson::StringBuffer* output,
-                                                      bool* is_null) const {
-    CHECK(is_finalized());
-    if (subcolumns.empty()) {
-        if (is_null != nullptr) {
-            *is_null = true;
-        } else {
-            rapidjson::Value root(rapidjson::kNullType);
-            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
-            if (!root.Accept(writer)) {
-                return Status::InternalError("Failed to serialize json value");
+/// Struct that represents elements of the JSON path.
+/// "a.b.c" -> ["a", "b", "c"]
+struct PathElements {
+    explicit PathElements(const String& path) {
+        const char* start = path.data();
+        const char* end = start + path.size();
+        const char* pos = start;
+        const char* last_dot_pos = pos - 1;
+        for (pos = start; pos != end; ++pos) {
+            if (*pos == '.') {
+                elements.emplace_back(last_dot_pos + 1, size_t(pos - 
last_dot_pos - 1));
+                last_dot_pos = pos;
             }
         }
-        return Status::OK();
+
+        elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 
1));
     }
-    CHECK(size() > row);
-    rapidjson::StringBuffer buffer;
-    rapidjson::Value root(rapidjson::kNullType);
-    if (doc_structure == nullptr) {
-        doc_structure = std::make_shared<rapidjson::Document>();
-        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
-        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+
+    size_t size() const { return elements.size(); }
+
+    std::vector<std::string_view> elements;
+};
+
+/// Struct that represents a prefix of a JSON path. Used during output of the 
JSON object.
+struct Prefix {
+    /// Shrink current prefix to the common prefix of current prefix and 
specified path.
+    /// For example, if current prefix is a.b.c.d and path is a.b.e, then 
shrink the prefix to a.b.
+    void shrink_to_common_prefix(const PathElements& path_elements) {
+        /// Don't include last element in path_elements in the prefix.
+        size_t i = 0;
+        while (i != elements.size() && i != (path_elements.elements.size() - 
1) &&
+               elements[i].first == path_elements.elements[i])
+            ++i;
+        elements.resize(i);
     }
-    if (!doc_structure->IsNull()) {
-        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+
+    /// Check is_first flag in current object.
+    bool is_first_in_current_object() const {
+        if (elements.empty()) return root_is_first_flag;
+        return elements.back().second;
     }
-    Arena mem_pool;
-#ifndef NDEBUG
-    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
-#endif
-    for (const auto& subcolumn : subcolumns) {
-        RETURN_IF_ERROR(find_and_set_leave_value(
-                subcolumn->data.get_finalized_column_ptr(), subcolumn->path,
-                subcolumn->data.get_least_common_type_serde(),
-                subcolumn->data.get_least_common_type(),
-                subcolumn->data.least_common_type.get_base_type_id(), root,
-                doc_structure->GetAllocator(), mem_pool, row));
-        if (subcolumn->path.empty() && !root.IsObject()) {
-            // root was modified, only handle root node
-            break;
-        }
+
+    /// Set flag is_first = false in current object.
+    void set_not_first_in_current_object() {
+        if (elements.empty())
+            root_is_first_flag = false;
+        else
+            elements.back().second = false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           else {
               e
   }lements.back().second = false;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -1717,89 +1776,203 @@
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, std::string* 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    rapidjson::StringBuffer buf;
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         *output = type->to_string(*get_root(), row);
         return Status::OK();
     }
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    // TODO avoid copy
-    *output = std::string(buf.GetString(), buf.GetSize());
+    // TODO preallocate memory
+    auto tmp_col = ColumnString::create();
+    VectorBufferWriter write_buffer(*tmp_col.get());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, write_buffer, 
nullptr));
+    write_buffer.commit();
+    auto str_ref = tmp_col->get_data_at(0);
+    *output = std::string(str_ref.data, str_ref.size);
     return Status::OK();
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, BufferWritable& 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         type->to_string(*get_root(), row, output);
         return Status::OK();
     }
-    rapidjson::StringBuffer buf;
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    output.write(buf.GetString(), buf.GetLength());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, output, nullptr));
     return Status::OK();
 }
 
-Status ColumnObject::serialize_one_row_to_json_format(int64_t row, 
rapidjson::StringBuffer* output,
-                                                      bool* is_null) const {
-    CHECK(is_finalized());
-    if (subcolumns.empty()) {
-        if (is_null != nullptr) {
-            *is_null = true;
-        } else {
-            rapidjson::Value root(rapidjson::kNullType);
-            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
-            if (!root.Accept(writer)) {
-                return Status::InternalError("Failed to serialize json value");
+/// Struct that represents elements of the JSON path.
+/// "a.b.c" -> ["a", "b", "c"]
+struct PathElements {
+    explicit PathElements(const String& path) {
+        const char* start = path.data();
+        const char* end = start + path.size();
+        const char* pos = start;
+        const char* last_dot_pos = pos - 1;
+        for (pos = start; pos != end; ++pos) {
+            if (*pos == '.') {
+                elements.emplace_back(last_dot_pos + 1, size_t(pos - 
last_dot_pos - 1));
+                last_dot_pos = pos;
             }
         }
-        return Status::OK();
+
+        elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 
1));
     }
-    CHECK(size() > row);
-    rapidjson::StringBuffer buffer;
-    rapidjson::Value root(rapidjson::kNullType);
-    if (doc_structure == nullptr) {
-        doc_structure = std::make_shared<rapidjson::Document>();
-        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
-        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+
+    size_t size() const { return elements.size(); }
+
+    std::vector<std::string_view> elements;
+};
+
+/// Struct that represents a prefix of a JSON path. Used during output of the 
JSON object.
+struct Prefix {
+    /// Shrink current prefix to the common prefix of current prefix and 
specified path.
+    /// For example, if current prefix is a.b.c.d and path is a.b.e, then 
shrink the prefix to a.b.
+    void shrink_to_common_prefix(const PathElements& path_elements) {
+        /// Don't include last element in path_elements in the prefix.
+        size_t i = 0;
+        while (i != elements.size() && i != (path_elements.elements.size() - 
1) &&
+               elements[i].first == path_elements.elements[i])
+            ++i;
+        elements.resize(i);
     }
-    if (!doc_structure->IsNull()) {
-        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+
+    /// Check is_first flag in current object.
+    bool is_first_in_current_object() const {
+        if (elements.empty()) return root_is_first_flag;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (elements.empty()) { return root_is_first_flag;
   }
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -1717,89 +1776,203 @@
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, std::string* 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    rapidjson::StringBuffer buf;
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         *output = type->to_string(*get_root(), row);
         return Status::OK();
     }
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    // TODO avoid copy
-    *output = std::string(buf.GetString(), buf.GetSize());
+    // TODO preallocate memory
+    auto tmp_col = ColumnString::create();
+    VectorBufferWriter write_buffer(*tmp_col.get());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, write_buffer, 
nullptr));
+    write_buffer.commit();
+    auto str_ref = tmp_col->get_data_at(0);
+    *output = std::string(str_ref.data, str_ref.size);
     return Status::OK();
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, BufferWritable& 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         type->to_string(*get_root(), row, output);
         return Status::OK();
     }
-    rapidjson::StringBuffer buf;
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    output.write(buf.GetString(), buf.GetLength());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, output, nullptr));
     return Status::OK();
 }
 
-Status ColumnObject::serialize_one_row_to_json_format(int64_t row, 
rapidjson::StringBuffer* output,
-                                                      bool* is_null) const {
-    CHECK(is_finalized());
-    if (subcolumns.empty()) {
-        if (is_null != nullptr) {
-            *is_null = true;
-        } else {
-            rapidjson::Value root(rapidjson::kNullType);
-            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
-            if (!root.Accept(writer)) {
-                return Status::InternalError("Failed to serialize json value");
+/// Struct that represents elements of the JSON path.
+/// "a.b.c" -> ["a", "b", "c"]
+struct PathElements {
+    explicit PathElements(const String& path) {
+        const char* start = path.data();
+        const char* end = start + path.size();
+        const char* pos = start;
+        const char* last_dot_pos = pos - 1;
+        for (pos = start; pos != end; ++pos) {
+            if (*pos == '.') {
+                elements.emplace_back(last_dot_pos + 1, size_t(pos - 
last_dot_pos - 1));
+                last_dot_pos = pos;
             }
         }
-        return Status::OK();
+
+        elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 
1));
     }
-    CHECK(size() > row);
-    rapidjson::StringBuffer buffer;
-    rapidjson::Value root(rapidjson::kNullType);
-    if (doc_structure == nullptr) {
-        doc_structure = std::make_shared<rapidjson::Document>();
-        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
-        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+
+    size_t size() const { return elements.size(); }
+
+    std::vector<std::string_view> elements;
+};
+
+/// Struct that represents a prefix of a JSON path. Used during output of the 
JSON object.
+struct Prefix {
+    /// Shrink current prefix to the common prefix of current prefix and 
specified path.
+    /// For example, if current prefix is a.b.c.d and path is a.b.e, then 
shrink the prefix to a.b.
+    void shrink_to_common_prefix(const PathElements& path_elements) {
+        /// Don't include last element in path_elements in the prefix.
+        size_t i = 0;
+        while (i != elements.size() && i != (path_elements.elements.size() - 
1) &&
+               elements[i].first == path_elements.elements[i])
+            ++i;
+        elements.resize(i);
     }
-    if (!doc_structure->IsNull()) {
-        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+
+    /// Check is_first flag in current object.
+    bool is_first_in_current_object() const {
+        if (elements.empty()) return root_is_first_flag;
+        return elements.back().second;
     }
-    Arena mem_pool;
-#ifndef NDEBUG
-    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
-#endif
-    for (const auto& subcolumn : subcolumns) {
-        RETURN_IF_ERROR(find_and_set_leave_value(
-                subcolumn->data.get_finalized_column_ptr(), subcolumn->path,
-                subcolumn->data.get_least_common_type_serde(),
-                subcolumn->data.get_least_common_type(),
-                subcolumn->data.least_common_type.get_base_type_id(), root,
-                doc_structure->GetAllocator(), mem_pool, row));
-        if (subcolumn->path.empty() && !root.IsObject()) {
-            // root was modified, only handle root node
-            break;
-        }
+
+    /// Set flag is_first = false in current object.
+    void set_not_first_in_current_object() {
+        if (elements.empty())
+            root_is_first_flag = false;
+        else
+            elements.back().second = false;
     }
-    compact_null_values(root, doc_structure->GetAllocator());
-    if (root.IsNull() && is_null != nullptr) {
-        // Fast path
-        *is_null = true;
-    } else {
-        output->Clear();
-        rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
-        if (!root.Accept(writer)) {
-            return Status::InternalError("Failed to serialize json value");
+
+    size_t size() const { return elements.size(); }
+
+    /// Elements of the prefix: (path element, is_first flag in this prefix).
+    /// is_first flag indicates if we already serialized some key in the 
object with such prefix.
+    std::vector<std::pair<std::string_view, bool>> elements;
+    bool root_is_first_flag = true;
+};
+
+Status ColumnObject::serialize_one_row_to_json_format(int64_t row_num, 
BufferWritable& output,

Review Comment:
   warning: function 'serialize_one_row_to_json_format' exceeds recommended 
size/complexity thresholds [readability-function-size]
   ```cpp
   Status ColumnObject::serialize_one_row_to_json_format(int64_t row_num, 
BufferWritable& output,
                        ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/vec/columns/column_object.cpp:1867:** 108 lines including 
whitespace and comments (threshold 80)
   ```cpp
   Status ColumnObject::serialize_one_row_to_json_format(int64_t row_num, 
BufferWritable& output,
                        ^
   ```
   
   </details>
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -1717,89 +1776,203 @@
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, std::string* 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    rapidjson::StringBuffer buf;
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         *output = type->to_string(*get_root(), row);
         return Status::OK();
     }
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    // TODO avoid copy
-    *output = std::string(buf.GetString(), buf.GetSize());
+    // TODO preallocate memory
+    auto tmp_col = ColumnString::create();
+    VectorBufferWriter write_buffer(*tmp_col.get());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, write_buffer, 
nullptr));
+    write_buffer.commit();
+    auto str_ref = tmp_col->get_data_at(0);
+    *output = std::string(str_ref.data, str_ref.size);
     return Status::OK();
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, BufferWritable& 
output) const {
-    if (!is_finalized()) {
-        const_cast<ColumnObject*>(this)->finalize();
-    }
-    if (is_scalar_variant()) {
+    // if (!is_finalized()) {
+    //     const_cast<ColumnObject*>(this)->finalize();
+    // }
+    if (is_scalar_variant() && is_finalized()) {
         auto type = get_root_type();
         type->to_string(*get_root(), row, output);
         return Status::OK();
     }
-    rapidjson::StringBuffer buf;
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, &buf, nullptr));
-    output.write(buf.GetString(), buf.GetLength());
+    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, output, nullptr));
     return Status::OK();
 }
 
-Status ColumnObject::serialize_one_row_to_json_format(int64_t row, 
rapidjson::StringBuffer* output,
-                                                      bool* is_null) const {
-    CHECK(is_finalized());
-    if (subcolumns.empty()) {
-        if (is_null != nullptr) {
-            *is_null = true;
-        } else {
-            rapidjson::Value root(rapidjson::kNullType);
-            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
-            if (!root.Accept(writer)) {
-                return Status::InternalError("Failed to serialize json value");
+/// Struct that represents elements of the JSON path.
+/// "a.b.c" -> ["a", "b", "c"]
+struct PathElements {
+    explicit PathElements(const String& path) {
+        const char* start = path.data();
+        const char* end = start + path.size();
+        const char* pos = start;
+        const char* last_dot_pos = pos - 1;
+        for (pos = start; pos != end; ++pos) {
+            if (*pos == '.') {
+                elements.emplace_back(last_dot_pos + 1, size_t(pos - 
last_dot_pos - 1));
+                last_dot_pos = pos;
             }
         }
-        return Status::OK();
+
+        elements.emplace_back(last_dot_pos + 1, size_t(pos - last_dot_pos - 
1));
     }
-    CHECK(size() > row);
-    rapidjson::StringBuffer buffer;
-    rapidjson::Value root(rapidjson::kNullType);
-    if (doc_structure == nullptr) {
-        doc_structure = std::make_shared<rapidjson::Document>();
-        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
-        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+
+    size_t size() const { return elements.size(); }
+
+    std::vector<std::string_view> elements;
+};
+
+/// Struct that represents a prefix of a JSON path. Used during output of the 
JSON object.
+struct Prefix {
+    /// Shrink current prefix to the common prefix of current prefix and 
specified path.
+    /// For example, if current prefix is a.b.c.d and path is a.b.e, then 
shrink the prefix to a.b.
+    void shrink_to_common_prefix(const PathElements& path_elements) {
+        /// Don't include last element in path_elements in the prefix.
+        size_t i = 0;
+        while (i != elements.size() && i != (path_elements.elements.size() - 
1) &&
+               elements[i].first == path_elements.elements[i])
+            ++i;
+        elements.resize(i);
     }
-    if (!doc_structure->IsNull()) {
-        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+
+    /// Check is_first flag in current object.
+    bool is_first_in_current_object() const {
+        if (elements.empty()) return root_is_first_flag;
+        return elements.back().second;
     }
-    Arena mem_pool;
-#ifndef NDEBUG
-    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
-#endif
-    for (const auto& subcolumn : subcolumns) {
-        RETURN_IF_ERROR(find_and_set_leave_value(
-                subcolumn->data.get_finalized_column_ptr(), subcolumn->path,
-                subcolumn->data.get_least_common_type_serde(),
-                subcolumn->data.get_least_common_type(),
-                subcolumn->data.least_common_type.get_base_type_id(), root,
-                doc_structure->GetAllocator(), mem_pool, row));
-        if (subcolumn->path.empty() && !root.IsObject()) {
-            // root was modified, only handle root node
-            break;
-        }
+
+    /// Set flag is_first = false in current object.
+    void set_not_first_in_current_object() {
+        if (elements.empty())

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (elements.empty()) {
   ```
   
   be/src/vec/columns/column_object.cpp:1855:
   ```diff
   -         else
   +         } else
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to