This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new 989aa0f4280 fix 4 (#45601)
989aa0f4280 is described below

commit 989aa0f4280270bba78542844af491a07a1d67ca
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Wed Dec 18 19:18:28 2024 +0800

    fix 4 (#45601)
---
 be/src/olap/rowset/segment_v2/column_reader.cpp    | 174 ++++++++++-----------
 be/src/olap/rowset/segment_v2/column_reader.h      |  72 ++++-----
 .../rowset/segment_v2/hierarchical_data_reader.cpp |  31 ++--
 .../rowset/segment_v2/hierarchical_data_reader.h   |   1 +
 be/src/olap/rowset/segment_v2/segment.cpp          |  38 +++--
 be/src/olap/rowset/segment_v2/segment.h            |   5 +-
 .../segment_v2/variant_column_writer_impl.cpp      |  15 +-
 be/src/vec/columns/column_object.cpp               |  11 +-
 be/src/vec/columns/column_object.h                 |  14 +-
 be/src/vec/data_types/data_type_object.cpp         |  17 ++
 10 files changed, 194 insertions(+), 184 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 745ff3d93a3..2f303999aea 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -282,18 +282,6 @@ Status VariantColumnReader::init(const 
ColumnReaderOptions& opts, const SegmentF
                                  io::FileReaderSPtr file_reader) {
     // init sub columns
     _subcolumn_readers = std::make_unique<SubcolumnColumnReaders>();
-    std::unordered_map<vectorized::PathInData, uint32_t, 
vectorized::PathInData::Hash>
-            column_path_to_footer_ordinal;
-    for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) {
-        const auto& column_pb = footer.columns(ordinal);
-        // column path for accessing subcolumns of variant
-        if (column_pb.has_column_path_info()) {
-            vectorized::PathInData path;
-            path.from_protobuf(column_pb.column_path_info());
-            column_path_to_footer_ordinal.emplace(path, ordinal);
-        }
-    }
-
     const ColumnMetaPB& self_column_pb = footer.columns(column_id);
     for (const ColumnMetaPB& column_pb : footer.columns()) {
         if (column_pb.unique_id() != self_column_pb.unique_id()) {
@@ -311,23 +299,25 @@ Status VariantColumnReader::init(const 
ColumnReaderOptions& opts, const SegmentF
                                                  &_sparse_column_reader));
             continue;
         }
-        // init subcolumns
         auto relative_path = path.copy_pop_front();
+        auto get_data_type_fn = [&]() {
+            if (relative_path.empty()) {
+                return 
make_nullable(std::make_unique<vectorized::ColumnObject::MostCommonType>());
+            }
+            return 
vectorized::DataTypeFactory::instance().create_data_type(column_pb);
+        };
+        // init subcolumns
         if (_subcolumn_readers->get_root() == nullptr) {
             _subcolumn_readers->create_root(SubcolumnReader {nullptr, 
nullptr});
         }
         if (relative_path.empty()) {
             // root column
-            
_subcolumn_readers->get_mutable_root()->modify_to_scalar(SubcolumnReader {
-                    std::move(reader),
-                    
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
+            _subcolumn_readers->get_mutable_root()->modify_to_scalar(
+                    SubcolumnReader {std::move(reader), get_data_type_fn()});
         } else {
             // check the root is already a leaf node
-            _subcolumn_readers->add(
-                    relative_path,
-                    SubcolumnReader {
-                            std::move(reader),
-                            
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
+            _subcolumn_readers->add(relative_path,
+                                    SubcolumnReader {std::move(reader), 
get_data_type_fn()});
         }
     }
 
@@ -876,7 +866,9 @@ Status ColumnReader::new_iterator(ColumnIterator** 
iterator) {
             return new_map_iterator(iterator);
         }
         case FieldType::OLAP_FIELD_TYPE_VARIANT: {
-            *iterator = new VariantRootColumnIterator(new 
FileColumnIterator(this));
+            // read from root data
+            // *iterator = new VariantRootColumnIterator(new 
FileColumnIterator(this));
+            *iterator = new FileColumnIterator(this);
             return Status::OK();
         }
         default:
@@ -1738,75 +1730,75 @@ void 
DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP
     }
 }
 
-Status VariantRootColumnIterator::_process_root_column(
-        vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& 
root_column,
-        const vectorized::DataTypePtr& most_common_type) {
-    auto& obj =
-            dst->is_nullable()
-                    ? assert_cast<vectorized::ColumnObject&>(
-                              
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
-                    : assert_cast<vectorized::ColumnObject&>(*dst);
-
-    // fill nullmap
-    if (root_column->is_nullable() && dst->is_nullable()) {
-        vectorized::ColumnUInt8& dst_null_map =
-                
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
-        vectorized::ColumnUInt8& src_null_map =
-                
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
-        dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
-    }
-
-    // add root column to a tmp object column
-    auto tmp = vectorized::ColumnObject::create(true, false);
-    auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp);
-    tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
-
-    // merge tmp object column to dst
-    obj.insert_range_from(*tmp, 0, tmp->size());
-
-    // finalize object if needed
-    if (!obj.is_finalized()) {
-        obj.finalize();
-    }
-
-#ifndef NDEBUG
-    obj.check_consistency();
-#endif
-
-    return Status::OK();
-}
-
-Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
-                                             bool* has_null) {
-    // read root column
-    auto& obj =
-            dst->is_nullable()
-                    ? assert_cast<vectorized::ColumnObject&>(
-                              
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
-                    : assert_cast<vectorized::ColumnObject&>(*dst);
-
-    auto most_common_type = obj.get_most_common_type();
-    auto root_column = most_common_type->create_column();
-    RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
-
-    return _process_root_column(dst, root_column, most_common_type);
-}
-
-Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const 
size_t count,
-                                                 vectorized::MutableColumnPtr& 
dst) {
-    // read root column
-    auto& obj =
-            dst->is_nullable()
-                    ? assert_cast<vectorized::ColumnObject&>(
-                              
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
-                    : assert_cast<vectorized::ColumnObject&>(*dst);
-
-    auto most_common_type = obj.get_most_common_type();
-    auto root_column = most_common_type->create_column();
-    RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column));
-
-    return _process_root_column(dst, root_column, most_common_type);
-}
+// Status VariantRootColumnIterator::_process_root_column(
+//         vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& 
root_column,
+//         const vectorized::DataTypePtr& most_common_type) {
+//     auto& obj =
+//             dst->is_nullable()
+//                     ? assert_cast<vectorized::ColumnObject&>(
+//                               
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+//                     : assert_cast<vectorized::ColumnObject&>(*dst);
+//
+//     // fill nullmap
+//     if (root_column->is_nullable() && dst->is_nullable()) {
+//         vectorized::ColumnUInt8& dst_null_map =
+//                 
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
+//         vectorized::ColumnUInt8& src_null_map =
+//                 
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
+//         dst_null_map.insert_range_from(src_null_map, 0, 
src_null_map.size());
+//     }
+//
+//     // add root column to a tmp object column
+//     auto tmp = vectorized::ColumnObject::create(true, false);
+//     auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp);
+//     tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
+//
+//     // merge tmp object column to dst
+//     obj.insert_range_from(*tmp, 0, tmp_obj.rows());
+//
+//     // finalize object if needed
+//     if (!obj.is_finalized()) {
+//         obj.finalize();
+//     }
+//
+// #ifndef NDEBUG
+//     obj.check_consistency();
+// #endif
+//
+//     return Status::OK();
+// }
+//
+// Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+//                                              bool* has_null) {
+//     // read root column
+//     auto& obj =
+//             dst->is_nullable()
+//                     ? assert_cast<vectorized::ColumnObject&>(
+//                               
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+//                     : assert_cast<vectorized::ColumnObject&>(*dst);
+//
+//     auto most_common_type = obj.get_most_common_type();
+//     auto root_column = most_common_type->create_column();
+//     RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
+//
+//     return _process_root_column(dst, root_column, most_common_type);
+// }
+//
+// Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, 
const size_t count,
+//                                                  
vectorized::MutableColumnPtr& dst) {
+//     // read root column
+//     auto& obj =
+//             dst->is_nullable()
+//                     ? assert_cast<vectorized::ColumnObject&>(
+//                               
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+//                     : assert_cast<vectorized::ColumnObject&>(*dst);
+//
+//     auto most_common_type = obj.get_most_common_type();
+//     auto root_column = most_common_type->create_column();
+//     RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, 
root_column));
+//
+//     return _process_root_column(dst, root_column, most_common_type);
+// }
 
 Status DefaultNestedColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst) {
     bool has_null = false;
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h 
b/be/src/olap/rowset/segment_v2/column_reader.h
index d61393e820c..189435c2095 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -216,7 +216,7 @@ public:
 
     void disable_index_meta_cache() { _use_index_page_cache = false; }
 
-    FieldType get_meta_type() { return _meta_type; }
+    virtual FieldType get_meta_type() { return _meta_type; }
 
 private:
     ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, 
uint64_t num_rows,
@@ -309,6 +309,8 @@ public:
 
     ~VariantColumnReader() override = default;
 
+    FieldType get_meta_type() override { return 
FieldType::OLAP_FIELD_TYPE_VARIANT; }
+
 private:
     std::unique_ptr<SubcolumnColumnReaders> _subcolumn_readers;
     std::unique_ptr<ColumnReader> _sparse_column_reader;
@@ -661,40 +663,40 @@ private:
     int32_t _segment_id = 0;
 };
 
-class VariantRootColumnIterator : public ColumnIterator {
-public:
-    VariantRootColumnIterator() = delete;
-
-    explicit VariantRootColumnIterator(FileColumnIterator* iter) { 
_inner_iter.reset(iter); }
-
-    ~VariantRootColumnIterator() override = default;
-
-    Status init(const ColumnIteratorOptions& opts) override { return 
_inner_iter->init(opts); }
-
-    Status seek_to_first() override { return _inner_iter->seek_to_first(); }
-
-    Status seek_to_ordinal(ordinal_t ord_idx) override {
-        return _inner_iter->seek_to_ordinal(ord_idx);
-    }
-
-    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
-        bool has_null;
-        return next_batch(n, dst, &has_null);
-    }
-
-    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override;
-
-    Status read_by_rowids(const rowid_t* rowids, const size_t count,
-                          vectorized::MutableColumnPtr& dst) override;
-
-    ordinal_t get_current_ordinal() const override { return 
_inner_iter->get_current_ordinal(); }
-
-private:
-    Status _process_root_column(vectorized::MutableColumnPtr& dst,
-                                vectorized::MutableColumnPtr& root_column,
-                                const vectorized::DataTypePtr& 
most_common_type);
-    std::unique_ptr<FileColumnIterator> _inner_iter;
-};
+// class VariantRootColumnIterator : public ColumnIterator {
+// public:
+//     VariantRootColumnIterator() = delete;
+//
+//     explicit VariantRootColumnIterator(FileColumnIterator* iter) { 
_inner_iter.reset(iter); }
+//
+//     ~VariantRootColumnIterator() override = default;
+//
+//     Status init(const ColumnIteratorOptions& opts) override { return 
_inner_iter->init(opts); }
+//
+//     Status seek_to_first() override { return _inner_iter->seek_to_first(); }
+//
+//     Status seek_to_ordinal(ordinal_t ord_idx) override {
+//         return _inner_iter->seek_to_ordinal(ord_idx);
+//     }
+//
+//     Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
+//         bool has_null;
+//         return next_batch(n, dst, &has_null);
+//     }
+//
+//     Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override;
+//
+//     Status read_by_rowids(const rowid_t* rowids, const size_t count,
+//                           vectorized::MutableColumnPtr& dst) override;
+//
+//     ordinal_t get_current_ordinal() const override { return 
_inner_iter->get_current_ordinal(); }
+//
+// private:
+//     Status _process_root_column(vectorized::MutableColumnPtr& dst,
+//                                 vectorized::MutableColumnPtr& root_column,
+//                                 const vectorized::DataTypePtr& 
most_common_type);
+//     std::unique_ptr<FileColumnIterator> _inner_iter;
+// };
 
 // This iterator is used to read default value column
 class DefaultValueColumnIterator : public ColumnIterator {
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
index 2b8e58d47f1..ca25b230bce 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
@@ -239,15 +239,17 @@ Status 
HierarchicalDataReader::_init_container(vectorized::MutableColumnPtr& con
 
     // add root first
     if (_path.get_parts().empty() && _root_reader) {
-        auto& root_var =
-                _root_reader->column->is_nullable()
-                        ? assert_cast<vectorized::ColumnObject&>(
-                                  
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
-                                          .get_nested_column())
-                        : 
assert_cast<vectorized::ColumnObject&>(*_root_reader->column);
-        auto column = root_var.get_root();
-        auto type = root_var.get_root_type();
-        container_variant.add_sub_column({}, std::move(column), type);
+        // auto& root_var =
+        //         _root_reader->column->is_nullable()
+        //                 ? assert_cast<vectorized::ColumnObject&>(
+        //                           
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+        //                                   .get_nested_column())
+        //                 : 
assert_cast<vectorized::ColumnObject&>(*_root_reader->column);
+        // auto column = root_var.get_root();
+        // auto type = root_var.get_root_type();
+        MutableColumnPtr column = _root_reader->column->get_ptr();
+        container_variant.add_sub_column({}, std::move(column),
+                                         ColumnObject::get_most_common_type());
     }
     // parent path -> subcolumns
     std::map<PathInData, PathsWithColumnAndType> nested_subcolumns;
@@ -361,7 +363,9 @@ Status 
HierarchicalDataReader::_init_null_map_and_clear_columns(
         return Status::OK();
     }));
     container->clear();
-    _sparse_column_reader->column->clear();
+    if (_sparse_column_reader) {
+        _sparse_column_reader->column->clear();
+    }
     if (_root_reader) {
         if (_root_reader->column->is_nullable()) {
             // fill nullmap
@@ -372,13 +376,8 @@ Status 
HierarchicalDataReader::_init_null_map_and_clear_columns(
             dst_null_map.insert_range_from(src_null_map, 0, 
src_null_map.size());
             // clear nullmap and inner data
             src_null_map.clear();
-            assert_cast<ColumnObject&>(
-                    
assert_cast<ColumnNullable&>(*_root_reader->column).get_nested_column())
-                    .clear_column_data();
-        } else {
-            auto& root_column = 
assert_cast<ColumnObject&>(*_root_reader->column);
-            root_column.clear_column_data();
         }
+        _root_reader->column->clear();
     } else {
         if (dst->is_nullable()) {
             // No nullable info exist in hirearchical data, fill nullmap with 
all none null
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
index 5d58f666f62..83dab269dfc 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
@@ -22,6 +22,7 @@
 #include <unordered_map>
 #include <utility>
 
+#include "common/exception.h"
 #include "common/status.h"
 #include "io/io_common.h"
 #include "olap/field.h"
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 441e839e6ef..ededa493018 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -201,22 +201,23 @@ Status Segment::_open() {
     // 0.01 comes from PrimaryKeyIndexBuilder::init
     _meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8;
 
-    uint32_t ordinal = 0;
-    for (const auto& column_meta : _footer_pb->columns()) {
-        // unique_id < 0 means this column is extracted column from variant
-        if (static_cast<int>(column_meta.unique_id()) >= 0) {
-            _column_id_to_footer_ordinal[column_meta.unique_id()] = ordinal++;
+    // collec variant statistics
+    for (const auto& column_pb : _footer_pb->columns()) {
+        if (column_pb.has_variant_statistics()) {
+            _variant_column_stats.try_emplace(column_pb.unique_id(),
+                                              column_pb.variant_statistics());
         }
     }
+
     return Status::OK();
 }
 
-const ColumnMetaPB* Segment::get_column_meta(int32_t unique_id) const {
-    auto it = _column_id_to_footer_ordinal.find(unique_id);
-    if (it == _column_id_to_footer_ordinal.end()) {
+const VariantStatisticsPB* Segment::get_stats(int32_t unique_id) const {
+    auto it = _variant_column_stats.find(unique_id);
+    if (it == _variant_column_stats.end()) {
         return nullptr;
     }
-    return &_footer_pb->columns(it->second);
+    return &it->second;
 }
 
 Status Segment::_open_inverted_index() {
@@ -570,8 +571,9 @@ Status Segment::healthy_status() {
 vectorized::DataTypePtr Segment::get_data_type_of(const ColumnIdentifier& 
identifier,
                                                   bool read_flat_leaves) const 
{
     // Path has higher priority
-    if (identifier.path != nullptr && !identifier.path->empty()) {
-        auto relative_path = identifier.path->copy_pop_front();
+    auto relative_path = identifier.path != nullptr ? 
identifier.path->copy_pop_front()
+                                                    : vectorized::PathInData();
+    if (!relative_path.empty()) {
         int32_t unique_id =
                 identifier.unique_id > 0 ? identifier.unique_id : 
identifier.parent_unique_id;
         const auto* node = _column_readers.contains(unique_id)
@@ -605,11 +607,17 @@ Status Segment::_create_column_readers_once() {
 }
 
 Status Segment::_create_column_readers(const SegmentFooterPB& footer) {
+    // unique_id -> idx in footer.columns()
+    std::unordered_map<int32_t, uint32_t> column_id_to_footer_ordinal;
+    uint32_t ordinal = 0;
+    for (const auto& column_meta : _footer_pb->columns()) {
+        column_id_to_footer_ordinal.try_emplace(column_meta.unique_id(), 
ordinal++);
+    }
     // init by unique_id
     for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
         const auto& column = _tablet_schema->column(ordinal);
-        auto iter = _column_id_to_footer_ordinal.find(column.unique_id());
-        if (iter == _column_id_to_footer_ordinal.end()) {
+        auto iter = column_id_to_footer_ordinal.find(column.unique_id());
+        if (iter == column_id_to_footer_ordinal.end()) {
             continue;
         }
 
@@ -796,8 +804,8 @@ Status Segment::new_column_iterator(const TabletColumn& 
tablet_column,
     // }
 
     // For compability reason unique_id may less than 0 for variant extracted 
column
-    int32_t unique_id = tablet_column.unique_id() > 0 ? 
tablet_column.unique_id()
-                                                      : 
tablet_column.parent_unique_id();
+    int32_t unique_id = tablet_column.unique_id() >= 0 ? 
tablet_column.unique_id()
+                                                       : 
tablet_column.parent_unique_id();
     // init default iterator
     if (!_column_readers.contains(unique_id)) {
         RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index 877f74ae1c3..1c7b9427163 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -208,7 +208,7 @@ public:
 
     const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; }
 
-    const ColumnMetaPB* get_column_meta(int32_t unique_id) const;
+    const VariantStatisticsPB* get_stats(int32_t unique_id) const;
 
 private:
     DISALLOW_COPY_AND_ASSIGN(Segment);
@@ -288,8 +288,7 @@ private:
 
     int _be_exec_version = BeExecVersionManager::get_newest_version();
     OlapReaderStatistics* _pk_index_load_stats = nullptr;
-    // unique_id -> idx in footer.columns()
-    std::unordered_map<int32_t, uint32_t> _column_id_to_footer_ordinal;
+    std::unordered_map<int32_t, VariantStatisticsPB> _variant_column_stats;
 };
 
 } // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp 
b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
index 5fbb7433e10..a3671f3afd3 100644
--- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
@@ -68,22 +68,19 @@ Status 
VariantColumnWriterImpl::_get_subcolumn_paths_from_stats(std::set<std::st
         RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
                 std::static_pointer_cast<BetaRowset>(reader->rowset()), 
&segment_cache));
         for (const auto& segment : segment_cache.get_segments()) {
-            const auto* column_meta_pb = 
segment->get_column_meta(_tablet_column->unique_id());
-            if (!column_meta_pb) {
+            const VariantStatisticsPB* source_statistics =
+                    segment->get_stats(_tablet_column->unique_id());
+            if (!source_statistics) {
                 continue;
             }
-            if (!column_meta_pb->has_variant_statistics()) {
-                continue;
-            }
-            const VariantStatisticsPB& source_statistics = 
column_meta_pb->variant_statistics();
-            for (const auto& [path, size] : 
source_statistics.subcolumn_non_null_size()) {
+            for (const auto& [path, size] : 
source_statistics->subcolumn_non_null_size()) {
                 auto it = path_to_total_number_of_non_null_values.find(path);
                 if (it == path_to_total_number_of_non_null_values.end()) {
                     it = path_to_total_number_of_non_null_values.emplace(path, 
0).first;
                 }
                 it->second += size;
             }
-            for (const auto& [path, size] : 
source_statistics.sparse_column_non_null_size()) {
+            for (const auto& [path, size] : 
source_statistics->sparse_column_non_null_size()) {
                 auto it = path_to_total_number_of_non_null_values.find(path);
                 if (it == path_to_total_number_of_non_null_values.end()) {
                     it = path_to_total_number_of_non_null_values.emplace(path, 
0).first;
@@ -256,7 +253,7 @@ Status VariantColumnWriterImpl::_process_sparse_column(
 }
 
 void VariantStatistics::to_pb(VariantStatisticsPB* stats) const {
-    for (const auto& [path, value] : _sparse_column_non_null_size) {
+    for (const auto& [path, value] : _subcolumns_non_null_size) {
         stats->mutable_subcolumn_non_null_size()->emplace(path.to_string(), 
value);
     }
     for (const auto& [path, value] : _sparse_column_non_null_size) {
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index f234ba7bfa4..eb397e85a32 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -2231,12 +2231,6 @@ void ColumnObject::clear() {
     _prev_positions.clear();
 }
 
-void ColumnObject::create_root() {
-    auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
-                            : std::make_shared<MostCommonType>();
-    add_sub_column({}, type->create_column(), type);
-}
-
 void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& 
column) {
     if (num_rows == 0) {
         num_rows = column->size();
@@ -2244,9 +2238,8 @@ void ColumnObject::create_root(const DataTypePtr& type, 
MutableColumnPtr&& colum
     add_sub_column({}, std::move(column), type);
 }
 
-DataTypePtr ColumnObject::get_most_common_type() const {
-    auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
-                            : std::make_shared<MostCommonType>();
+const DataTypePtr& ColumnObject::get_most_common_type() {
+    static auto type = make_nullable(std::make_shared<MostCommonType>());
     return type;
 }
 
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index 86ba60fffce..647516f97cd 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -46,6 +46,7 @@
 #include "vec/core/types.h"
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_jsonb.h"
+#include "vec/data_types/data_type_map.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/serde/data_type_serde.h"
 #include "vec/io/reader_buffer.h"
@@ -307,15 +308,10 @@ public:
     // ensure root node is a certain type
     void ensure_root_node_type(const DataTypePtr& type);
 
-    // create jsonb root if missing
-    // notice: should only using in VariantRootColumnIterator
-    // since some datastructures(sparse columns are schema on read
-    void create_root();
-
     // create root with type and column if missing
     void create_root(const DataTypePtr& type, MutableColumnPtr&& column);
 
-    DataTypePtr get_most_common_type() const;
+    static const DataTypePtr& get_most_common_type();
 
     // root is null or type nothing
     bool is_null_root() const;
@@ -377,6 +373,12 @@ public:
                                              
vectorized::ColumnArray::ColumnOffsets::create());
     }
 
+    static const DataTypePtr& get_sparse_column_type() {
+        static DataTypePtr type = 
std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(),
+                                                                
std::make_shared<DataTypeString>());
+        return type;
+    }
+
     void set_sparse_column(ColumnPtr column) { serialized_sparse_column = 
column; }
 
     Status finalize(FinalizeMode mode);
diff --git a/be/src/vec/data_types/data_type_object.cpp 
b/be/src/vec/data_types/data_type_object.cpp
index 0c795e542b0..5829554d118 100644
--- a/be/src/vec/data_types/data_type_object.cpp
+++ b/be/src/vec/data_types/data_type_object.cpp
@@ -30,6 +30,7 @@
 #include <vector>
 
 #include "agent/be_exec_version_manager.h"
+#include "vec/columns/column.h"
 #include "vec/columns/column_object.h"
 #include "vec/common/assert_cast.h"
 #include "vec/common/typeid_cast.h"
@@ -84,6 +85,11 @@ int64_t 
DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
         size += sizeof(uint32_t);
     }
 
+    // sparse column
+    // TODO make compability with sparse column
+    size += 
ColumnObject::get_sparse_column_type()->get_uncompressed_serialized_bytes(
+            *column_object.get_sparse_column(), be_exec_version);
+
     return size;
 }
 
@@ -134,6 +140,11 @@ char* DataTypeObject::serialize(const IColumn& column, 
char* buf, int be_exec_ve
         buf += sizeof(uint32_t);
     }
 
+    // serialize sparse column
+    // TODO make compability with sparse column
+    buf = 
ColumnObject::get_sparse_column_type()->serialize(*column_object.get_sparse_column(),
 buf,
+                                                            be_exec_version);
+
     return buf;
 }
 
@@ -175,6 +186,12 @@ const char* DataTypeObject::deserialize(const char* buf, 
MutableColumnPtr* colum
         buf += sizeof(uint32_t);
     }
 
+    // deserialize sparse column
+    // TODO make compability with sparse column
+    MutableColumnPtr sparse_column = 
ColumnObject::get_sparse_column_type()->create_column();
+    buf = ColumnObject::get_sparse_column_type()->deserialize(buf, 
&sparse_column, be_exec_version);
+    column_object->set_sparse_column(std::move(sparse_column));
+
     column_object->finalize();
 #ifndef NDEBUG
     // DCHECK size


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to