This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new f4236bd0807 [Fix](Variant) fix some p0 cases (#50339)
f4236bd0807 is described below

commit f4236bd0807e8d95397acb2d6a5d5e5bc4b8574d
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Thu Apr 24 00:17:41 2025 +0800

    [Fix](Variant) fix some p0 cases (#50339)
    
    1. fix `variant_max_subcolumns_count` in none variant table
    2. fix predicate with variant itself like `where v > 5` and refactor
    `get_data_type_of`
    3. fix DataType.fromCatalogType for variant type in `trivialTypes`
    4. fix some serialize cases
---
 be/src/olap/rowset/segment_v2/segment.cpp          |  202 +--
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |    4 +-
 .../variant_column_writer_reader_test.cpp          | 1326 ++++++++++----------
 .../olap/rowset/variant_with_compaction_test.cpp   |    0
 .../java/org/apache/doris/catalog/OlapTable.java   |    6 +-
 .../org/apache/doris/nereids/types/DataType.java   |   17 +-
 .../java/org/apache/doris/qe/SessionVariable.java  |    2 +-
 .../ddl/create_nestedtypes_with_schemachange.out   |  Bin 3234 -> 3258 bytes
 .../test_modify_reorder_column.out                 |  Bin 1043 -> 1025 bytes
 regression-test/data/variant_p0/load.out           |  Bin 16265 -> 16350 bytes
 regression-test/suites/variant_p0/load.groovy      |    1 +
 11 files changed, 722 insertions(+), 836 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 36c419d5936..785e68eac7d 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -576,38 +576,48 @@ Status Segment::healthy_status() {
 }
 
 // Return the storage datatype of related column to field.
-// Return nullptr meaning no such storage infomation for this column
 vectorized::DataTypePtr Segment::get_data_type_of(const TabletColumn& column,
                                                   bool read_flat_leaves) const 
{
-    // Path has higher priority
-    auto path = column.path_info_ptr();
-    auto relative_path = path != nullptr ? path->copy_pop_front() : 
vectorized::PathInData();
-    if (!relative_path.empty()) {
-        int32_t unique_id = column.unique_id() > 0 ? column.unique_id() : 
column.parent_unique_id();
-        const auto* node = _column_readers.contains(unique_id)
-                                   ? 
((VariantColumnReader*)(_column_readers.at(unique_id).get()))
-                                             
->get_reader_by_path(relative_path)
-                                   : nullptr;
-        if (node) {
-            bool exist_in_sparse = 
((VariantColumnReader*)(_column_readers.at(unique_id).get()))
-                                           
->exist_in_sparse_column(relative_path);
-            if (read_flat_leaves || (node->children.empty() && 
!exist_in_sparse)) {
-                return node->data.file_column_type;
-            }
-        }
-        // missing in storage, treat it using input data type
-        if (read_flat_leaves && !node) {
-            return nullptr;
-        }
-        // it contains children, exist in sparse column or column missing in 
storage, so treat it as variant
-        return column.is_nullable()
-                       ? 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>(
-                                 column.variant_max_subcolumns_count()))
-                       : std::make_shared<vectorized::DataTypeObject>(
-                                 column.variant_max_subcolumns_count());
+    const vectorized::PathInDataPtr path = column.path_info_ptr();
+
+    // none variant column
+    if (path == nullptr || path->empty()) {
+        return 
vectorized::DataTypeFactory::instance().create_data_type(column);
     }
-    // TODO support normal column type
-    return nullptr;
+
+    // Path exists, proceed with variant logic.
+    vectorized::PathInData relative_path = path->copy_pop_front();
+    int32_t unique_id = column.unique_id() > 0 ? column.unique_id() : 
column.parent_unique_id();
+
+    // Find the reader for the base variant column.
+    if (!_column_readers.contains(unique_id)) {
+        return 
vectorized::DataTypeFactory::instance().create_data_type(column);
+    }
+
+    const auto* variant_reader =
+            static_cast<const 
VariantColumnReader*>(_column_readers.at(unique_id).get());
+
+    // Find the specific node within the variant structure using the relative 
path.
+    const auto* node = variant_reader->get_reader_by_path(relative_path);
+
+    // Case 1: Node not found for the given path within the variant reader.
+    // If relative_path is empty, it means the original path pointed to the 
root
+    // of the variant column itself. We should return the Variant type.
+    if (node == nullptr || relative_path.empty()) {
+        return 
vectorized::DataTypeFactory::instance().create_data_type(column);
+    }
+
+    bool exist_in_sparse = 
variant_reader->exist_in_sparse_column(relative_path);
+    bool is_physical_leaf = node->children.empty();
+
+    // Condition to return the specific underlying type of the node:
+    // 1. We are reading flat leaves (ignoring hierarchy).
+    // 2. OR It's a leaf in the physical column structure AND it doesn't 
*also* exist
+    //    in the sparse column (meaning it's purely a materialized leaf).
+    if (read_flat_leaves || (is_physical_leaf && !exist_in_sparse)) {
+        return node->data.file_column_type;
+    }
+    return vectorized::DataTypeFactory::instance().create_data_type(column);
 }
 
 Status Segment::_create_column_readers_once(OlapReaderStatistics* stats) {
@@ -652,92 +662,6 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
         _column_readers.emplace(column.unique_id(), std::move(reader));
     }
 
-    // for (const auto& [path, ordinal] : column_path_to_footer_ordinal) {
-    //     const ColumnMetaPB& column_pb = footer.columns(ordinal);
-    //     ColumnReaderOptions opts {
-    //             .kept_in_memory = _tablet_schema->is_in_memory(),
-    //             .be_exec_version = _be_exec_version,
-    //     };
-    //     std::unique_ptr<ColumnReader> reader;
-    //     RETURN_IF_ERROR(
-    //             ColumnReader::create(opts, column_pb, footer.num_rows(), 
_file_reader, &reader));
-    //     int32_t unique_id = column_pb.unique_id();
-    //     auto relative_path = path.copy_pop_front();
-    //     if (_sub_column_tree[unique_id].get_root() == nullptr) {
-    //         _sub_column_tree[unique_id].create_root(SubcolumnReader 
{nullptr, nullptr});
-    //     }
-    //     if (relative_path.empty()) {
-    //         // root column
-    //         
_sub_column_tree[unique_id].get_mutable_root()->modify_to_scalar(SubcolumnReader
 {
-    //                 std::move(reader),
-    //                 
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
-    //     } else {
-    //         // check the root is already a leaf node
-    //         // 
DCHECK(_sub_column_tree[unique_id].get_leaves()[0]->path.empty());
-    //         _sub_column_tree[unique_id].add(
-    //                 relative_path,
-    //                 SubcolumnReader {
-    //                         std::move(reader),
-    //                         
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
-    //     }
-    // }
-
-    // compability reason use tablet schema
-    // init by column path
-    // for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
-    //     const auto& column = _tablet_schema->column(ordinal);
-    //     if (!column.has_path_info()) {
-    //         continue;
-    //     }
-    //     auto path = column.has_path_info() ? *column.path_info_ptr()
-    //                                        : 
vectorized::PathInData(column.name_lower_case());
-    //     auto iter = column_path_to_footer_ordinal.find(path);
-    //     if (iter == column_path_to_footer_ordinal.end()) {
-    //         continue;
-    //     }
-    //     const ColumnMetaPB& column_pb = footer.columns(iter->second);
-    //     ColumnReaderOptions opts {
-    //             .kept_in_memory = _tablet_schema->is_in_memory(),
-    //             .be_exec_version = _be_exec_version,
-    //     };
-    //     std::unique_ptr<ColumnReader> reader;
-    //     RETURN_IF_ERROR(
-    //             ColumnReader::create(opts, column_pb, footer.num_rows(), 
_file_reader, &reader));
-    //     // root column use unique id, leaf column use parent_unique_id
-    //     int32_t unique_id =
-    //             column.parent_unique_id() > 0 ? column.parent_unique_id() : 
column.unique_id();
-    //     auto relative_path = path.copy_pop_front();
-    //     if (relative_path.empty()) {
-    //         // root column
-    //         _sub_column_tree[unique_id].create_root(SubcolumnReader {
-    //                 std::move(reader),
-    //                 
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
-    //     } else {
-    //         // check the root is already a leaf node
-    //         
DCHECK(_sub_column_tree[unique_id].get_leaves()[0]->path.empty());
-    //         _sub_column_tree[unique_id].add(
-    //                 relative_path,
-    //                 SubcolumnReader {
-    //                         std::move(reader),
-    //                         
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
-    //     }
-
-    //     // init sparse columns paths and type info
-    //     for (uint32_t ordinal = 0; ordinal < 
column_pb.sparse_columns().size(); ++ordinal) {
-    //         const auto& spase_column_pb = column_pb.sparse_columns(ordinal);
-    //         if (spase_column_pb.has_column_path_info()) {
-    //             vectorized::PathInData path;
-    //             path.from_protobuf(spase_column_pb.column_path_info());
-    //             // Read from root column, so reader is nullptr
-    //             _sparse_column_tree[unique_id].add(
-    //                     path.copy_pop_front(),
-    //                     SubcolumnReader {nullptr,
-    //                                      
vectorized::DataTypeFactory::instance().create_data_type(
-    //                                              spase_column_pb)});
-    //         }
-    //     }
-    // }
-
     return Status::OK();
 }
 
@@ -761,49 +685,6 @@ Status Segment::new_default_iterator(const TabletColumn& 
tablet_column,
     return Status::OK();
 }
 
-// Status Segment::new_column_iterator_with_path(const TabletColumn& 
tablet_column,
-//                                               
std::unique_ptr<ColumnIterator>* iter,
-//                                               const StorageReadOptions* 
opt) {
-//     // root column use unique id, leaf column use parent_unique_id
-//     int32_t unique_id = tablet_column.unique_id() > 0 ? 
tablet_column.unique_id()
-//                                                       : 
tablet_column.parent_unique_id();
-//     if (!_sub_column_tree.contains(unique_id)) {
-//         // No such variant column in this segment, get a default one
-//         RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
-//         return Status::OK();
-//     }
-//     auto relative_path = tablet_column.path_info_ptr()->copy_pop_front();
-//     const auto* root = _sub_column_tree[unique_id].get_root();
-//     const auto* node = tablet_column.has_path_info()
-//                                ? 
_sub_column_tree[unique_id].find_exact(relative_path)
-//                                : nullptr;
-//
-//     if (node != nullptr) {
-//         if (node->is_leaf_node()) {
-//             // Node contains column without any child sub columns and no 
corresponding sparse columns
-//             // Direct read extracted columns
-//             const auto* node = 
_sub_column_tree[unique_id].find_leaf(relative_path);
-//             ColumnIterator* it;
-//             RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
-//             iter->reset(it);
-//         } else {
-//             // Node contains column with children columns or has 
correspoding sparse columns
-//             // Create reader with hirachical data.
-//             // If sparse column exists or read the full path of variant 
read in MERGE_ROOT, otherwise READ_DIRECT
-//             HierarchicalDataReader::ReadType read_type =
-//                     (relative_path == root->path) ? 
HierarchicalDataReader::ReadType::MERGE_ROOT
-//                                                   : 
HierarchicalDataReader::ReadType::READ_DIRECT;
-//             RETURN_IF_ERROR(
-//                     HierarchicalDataReader::create(iter, relative_path, 
node, root, read_type));
-//         }
-//     } else {
-//         // No such node, read from sparse column
-//         // TODO test if in VariantStatisticsPB.sparse_column_non_null_size, 
otherwise generate a default iterator
-//     }
-//
-//     return Status::OK();
-// }
-
 // Not use cid anymore, for example original table schema is colA int, then 
user do following actions
 // 1.add column b
 // 2. drop column b
@@ -819,11 +700,6 @@ Status Segment::new_column_iterator(const TabletColumn& 
tablet_column,
     }
     RETURN_IF_ERROR(_create_column_readers_once(opt->stats));
 
-    // init column iterator by path info
-    // if (tablet_column.has_path_info() || tablet_column.is_variant_type()) {
-    //     return new_column_iterator_with_path(tablet_column, iter, opt);
-    // }
-
     // For compability reason unique_id may less than 0 for variant extracted 
column
     int32_t unique_id = tablet_column.unique_id() >= 0 ? 
tablet_column.unique_id()
                                                        : 
tablet_column.parent_unique_id();
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 026ff4948ec..5d6763afd2a 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -290,7 +290,9 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, 
const TabletColumn& co
     opts.file_writer = _file_writer;
     opts.compression_type = _opts.compression_type;
     opts.footer = &_footer;
-    opts.input_rs_readers = _opts.rowset_ctx->input_rs_readers;
+    if (_opts.rowset_ctx != nullptr) {
+        opts.input_rs_readers = _opts.rowset_ctx->input_rs_readers;
+    }
 
     std::unique_ptr<ColumnWriter> writer;
     RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, 
&writer));
diff --git 
a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp 
b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
index 6945065e05a..d549d8f6ae8 100644
--- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
@@ -1,663 +1,663 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "gtest/gtest.h"
-#include "olap/rowset/segment_v2/column_reader.h"
-#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
-#include "olap/rowset/segment_v2/variant_column_writer_impl.h"
-#include "olap/storage_engine.h"
-#include "testutil/schema_utils.h"
-#include "testutil/variant_util.h"
-
-using namespace doris::vectorized;
-
-namespace doris {
-
-constexpr static uint32_t MAX_PATH_LEN = 1024;
-constexpr static std::string_view dest_dir = 
"/ut_dir/variant_column_writer_test";
-constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
-
-class VariantColumnWriterReaderTest : public testing::Test {
-public:
-    void SetUp() override {
-        // absolute dir
-        char buffer[MAX_PATH_LEN];
-        EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
-        _current_dir = std::string(buffer);
-        _absolute_dir = _current_dir + std::string(dest_dir);
-        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
-        
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
-
-        // tmp dir
-        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
-        
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
-        std::vector<StorePath> paths;
-        paths.emplace_back(std::string(tmp_dir), 1024000000);
-        auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
-        Status st = tmp_file_dirs->init();
-        EXPECT_TRUE(st.ok()) << st.to_json();
-        ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
-
-        // storage engine
-        doris::EngineOptions options;
-        auto engine = std::make_unique<StorageEngine>(options);
-        _engine_ref = engine.get();
-        _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
-        static_cast<void>(_data_dir->update_capacity());
-        ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
-    }
-
-    void TearDown() override {
-        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
-        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
-        _engine_ref = nullptr;
-        ExecEnv::GetInstance()->set_storage_engine(nullptr);
-    }
-
-    VariantColumnWriterReaderTest() = default;
-    ~VariantColumnWriterReaderTest() override = default;
-
-private:
-    TabletSchemaSPtr _tablet_schema = nullptr;
-    StorageEngine* _engine_ref = nullptr;
-    std::unique_ptr<DataDir> _data_dir = nullptr;
-    TabletSharedPtr _tablet = nullptr;
-    std::string _absolute_dir;
-    std::string _current_dir;
-};
-
-void check_column_meta(const ColumnMetaPB& column_meta, auto& path_with_size) {
-    EXPECT_TRUE(column_meta.has_column_path_info());
-    auto path = std::make_shared<vectorized::PathInData>();
-    path->from_protobuf(column_meta.column_path_info());
-    EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
-    EXPECT_EQ(column_meta.none_null_size(), 
path_with_size[path->copy_pop_front().get_path()]);
-}
-
-void check_sparse_column_meta(const ColumnMetaPB& column_meta, auto& 
path_with_size) {
-    EXPECT_TRUE(column_meta.has_column_path_info());
-    auto path = std::make_shared<vectorized::PathInData>();
-    path->from_protobuf(column_meta.column_path_info());
-    EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
-    for (const auto& [path, size] :
-         column_meta.variant_statistics().sparse_column_non_null_size()) {
-        EXPECT_EQ(size, path_with_size[path]);
-    }
-    EXPECT_EQ(path->copy_pop_front().get_path(), "__DORIS_VARIANT_SPARSE__");
-}
-
-TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) {
-    // 1. create tablet_schema
-    TabletSchemaPB schema_pb;
-    schema_pb.set_keys_type(KeysType::DUP_KEYS);
-    SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", "V1");
-    _tablet_schema = std::make_shared<TabletSchema>();
-    _tablet_schema->init_from_pb(schema_pb);
-
-    // 2. create tablet
-    TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
-    tablet_meta->_tablet_id = 10000;
-    _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
-
-    EXPECT_TRUE(_tablet->init().ok());
-    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
-
-    // 3. create file_writer
-    io::FileWriterPtr file_writer;
-    auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
-    auto st = io::global_local_filesystem()->create_file(file_path, 
&file_writer);
-    EXPECT_TRUE(st.ok()) << st.msg();
-
-    // 4. create column_writer
-    SegmentFooterPB footer;
-    ColumnWriterOptions opts;
-    opts.meta = footer.add_columns();
-    opts.compression_type = CompressionTypePB::LZ4;
-    opts.file_writer = file_writer.get();
-    opts.footer = &footer;
-    RowsetWriterContext rowset_ctx;
-    rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
-    opts.rowset_ctx = &rowset_ctx;
-    opts.rowset_ctx->tablet_schema = _tablet_schema;
-    TabletColumn column = _tablet_schema->column(0);
-    _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
-
-    std::unique_ptr<ColumnWriter> writer;
-    EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(), 
&writer).ok());
-    EXPECT_TRUE(writer->init().ok());
-    EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
-
-    // 5. write data
-    auto olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
-    auto block = _tablet_schema->create_block();
-    auto column_object = 
(*std::move(block.get_by_position(0).column)).mutate();
-    std::unordered_map<int, std::string> inserted_jsonstr;
-    auto path_with_size =
-            VariantUtil::fill_object_column_with_test_data(column_object, 
1000, &inserted_jsonstr);
-    olap_data_convertor->add_column_data_convertor(column);
-    olap_data_convertor->set_source_content(&block, 0, 1000);
-    auto [result, accessor] = olap_data_convertor->convert_column_data(0);
-    EXPECT_TRUE(result.ok());
-    EXPECT_TRUE(accessor != nullptr);
-    EXPECT_TRUE(writer->append(accessor->get_nullmap(), accessor->get_data(), 
1000).ok());
-    st = writer->finish();
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = writer->write_data();
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = writer->write_ordinal_index();
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = writer->write_zone_map();
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(file_writer->close().ok());
-    footer.set_num_rows(1000);
-
-    // 6. check footer
-    EXPECT_EQ(footer.columns_size(), 5);
-    auto column_meta = footer.columns(0);
-    EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
-
-    for (int i = 1; i < footer.columns_size() - 1; ++i) {
-        auto column_meta = footer.columns(i);
-        check_column_meta(column_meta, path_with_size);
-    }
-    check_sparse_column_meta(footer.columns(footer.columns_size() - 1), 
path_with_size);
-
-    // 7. check variant reader
-    io::FileReaderSPtr file_reader;
-    st = io::global_local_filesystem()->open_file(file_path, &file_reader);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    ColumnReaderOptions read_opts;
-    std::unique_ptr<ColumnReader> column_reader;
-    st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader, 
&column_reader);
-    EXPECT_TRUE(st.ok()) << st.msg();
-
-    auto variant_column_reader = 
assert_cast<VariantColumnReader*>(column_reader.get());
-    EXPECT_TRUE(variant_column_reader != nullptr);
-
-    auto subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key0"));
-    EXPECT_TRUE(subcolumn_reader != nullptr);
-    subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key1"));
-    EXPECT_TRUE(subcolumn_reader != nullptr);
-    subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key2"));
-    EXPECT_TRUE(subcolumn_reader != nullptr);
-    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key3")));
-    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key4")));
-    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key5")));
-    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key6")));
-    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key7")));
-    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key8")));
-    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key9")));
-    auto size = variant_column_reader->get_metadata_size();
-    EXPECT_GT(size, 0);
-
-    // 8. check statistics
-    auto statistics = variant_column_reader->get_stats();
-    for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
-        EXPECT_EQ(path_with_size[path], size);
-    }
-    for (const auto& [path, size] : statistics->sparse_column_non_null_size) {
-        EXPECT_EQ(path_with_size[path], size);
-    }
-
-    // 9. check hier reader
-    ColumnIterator* it;
-    TabletColumn parent_column = _tablet_schema->column(0);
-    StorageReadOptions storage_read_opts;
-    storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
-    st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-    ColumnIteratorOptions column_iter_opts;
-    OlapReaderStatistics stats;
-    column_iter_opts.stats = &stats;
-    column_iter_opts.file_reader = file_reader.get();
-    st = it->init(column_iter_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-
-    MutableColumnPtr new_column_object = ColumnObject::create(3);
-    size_t nrows = 1000;
-    st = it->seek_to_ordinal(0);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = it->next_batch(&nrows, new_column_object);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(stats.bytes_read > 0);
-
-    for (int i = 0; i < 1000; ++i) {
-        std::string value;
-        st = assert_cast<ColumnObject*>(new_column_object.get())
-                     ->serialize_one_row_to_string(i, &value);
-
-        EXPECT_TRUE(st.ok()) << st.msg();
-        EXPECT_EQ(value, inserted_jsonstr[i]);
-    }
-
-    std::vector<rowid_t> row_ids;
-    for (int i = 0; i < 1000; ++i) {
-        if (i % 7 == 0) {
-            row_ids.push_back(i);
-        }
-    }
-    new_column_object = ColumnObject::create(3);
-    st = it->read_by_rowids(row_ids.data(), row_ids.size(), new_column_object);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    for (int i = 0; i < row_ids.size(); ++i) {
-        std::string value;
-        st = assert_cast<ColumnObject*>(new_column_object.get())
-                     ->serialize_one_row_to_string(i, &value);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        EXPECT_EQ(value, inserted_jsonstr[row_ids[i]]);
-    }
-
-    auto read_to_column_object = [&]() {
-        new_column_object = ColumnObject::create(3);
-        nrows = 1000;
-        st = it->seek_to_ordinal(0);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        st = it->next_batch(&nrows, new_column_object);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        EXPECT_TRUE(stats.bytes_read > 0);
-        EXPECT_EQ(nrows, 1000);
-    };
-
-    // 10. check sparse extract reader
-    for (int i = 3; i < 10; ++i) {
-        std::string key = ".key" + std::to_string(i);
-        TabletColumn subcolumn_in_sparse;
-        subcolumn_in_sparse.set_name(parent_column.name_lower_case() + key);
-        subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-        subcolumn_in_sparse.set_parent_unique_id(parent_column.unique_id());
-        
subcolumn_in_sparse.set_path_info(PathInData(parent_column.name_lower_case() + 
key));
-        subcolumn_in_sparse.set_variant_max_subcolumns_count(
-                parent_column.variant_max_subcolumns_count());
-        subcolumn_in_sparse.set_is_nullable(true);
-
-        st = variant_column_reader->new_iterator(&it, subcolumn_in_sparse, 
&storage_read_opts);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-        st = it->init(column_iter_opts);
-        EXPECT_TRUE(st.ok()) << st.msg();
-
-        read_to_column_object();
-
-        for (int row = 0; row < 1000; ++row) {
-            std::string value;
-            st = assert_cast<ColumnObject*>(new_column_object.get())
-                         ->serialize_one_row_to_string(row, &value);
-            EXPECT_TRUE(st.ok()) << st.msg();
-            if (inserted_jsonstr[row].find(key) != std::string::npos) {
-                if (i % 2 == 0) {
-                    EXPECT_EQ(value, "88");
-                } else {
-                    EXPECT_EQ(value, "str99");
-                }
-            }
-        }
-    }
-
-    // 11. check leaf reader
-    auto check_leaf_reader = [&]() {
-        for (int i = 0; i < 3; ++i) {
-            std::string key = ".key" + std::to_string(i);
-            TabletColumn subcolumn;
-            subcolumn.set_name(parent_column.name_lower_case() + key);
-            subcolumn.set_type((FieldType)(int)footer.columns(i + 1).type());
-            subcolumn.set_parent_unique_id(parent_column.unique_id());
-            subcolumn.set_path_info(PathInData(parent_column.name_lower_case() 
+ key));
-            subcolumn.set_variant_max_subcolumns_count(
-                    parent_column.variant_max_subcolumns_count());
-            subcolumn.set_is_nullable(true);
-
-            st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-            EXPECT_TRUE(st.ok()) << st.msg();
-            EXPECT_TRUE(assert_cast<FileColumnIterator*>(it) != nullptr);
-            st = it->init(column_iter_opts);
-            EXPECT_TRUE(st.ok()) << st.msg();
-
-            auto column_type = 
DataTypeFactory::instance().create_data_type(subcolumn, false);
-            auto read_column = column_type->create_column();
-            nrows = 1000;
-            st = it->seek_to_ordinal(0);
-            EXPECT_TRUE(st.ok()) << st.msg();
-            st = it->next_batch(&nrows, read_column);
-            EXPECT_TRUE(st.ok()) << st.msg();
-            EXPECT_TRUE(stats.bytes_read > 0);
-
-            for (int row = 0; row < 1000; ++row) {
-                const std::string& value = 
column_type->to_string(*read_column, row);
-                if (inserted_jsonstr[row].find(key) != std::string::npos) {
-                    if (i % 2 == 0) {
-                        EXPECT_EQ(value, "88");
-                    } else {
-                        EXPECT_EQ(value, "str99");
-                    }
-                }
-            }
-        }
-    };
-    check_leaf_reader();
-
-    // 12. check empty
-    TabletColumn subcolumn;
-    subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-    subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-    subcolumn.set_parent_unique_id(parent_column.unique_id());
-    subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
-    subcolumn.set_is_nullable(true);
-    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
-
-    // 13. check statistics size == limit
-    auto& variant_stats = variant_column_reader->_statistics;
-    EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() <
-                config::variant_max_sparse_column_statistics_size);
-    auto limit = config::variant_max_sparse_column_statistics_size -
-                 variant_stats->sparse_column_non_null_size.size();
-    for (int i = 0; i < limit; ++i) {
-        std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
-        variant_stats->sparse_column_non_null_size[key] = 10000;
-    }
-    EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() ==
-                config::variant_max_sparse_column_statistics_size);
-
-    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-    st = it->init(column_iter_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-
-    auto check_empty_column = [&]() {
-        for (int row = 0; row < 1000; ++row) {
-            std::string value;
-            st = assert_cast<ColumnObject*>(new_column_object.get())
-                         ->serialize_one_row_to_string(row, &value);
-
-            EXPECT_TRUE(st.ok()) << st.msg();
-            EXPECT_EQ(value, "{}");
-        }
-    };
-
-    read_to_column_object();
-    check_empty_column();
-
-    // construct tablet schema for compaction
-    storage_read_opts.io_ctx.reader_type = ReaderType::READER_BASE_COMPACTION;
-    storage_read_opts.tablet_schema = _tablet_schema;
-    std::unordered_map<int32_t, TabletSchema::PathsSetInfo> 
uid_to_paths_set_info;
-    TabletSchema::PathsSetInfo paths_set_info;
-    paths_set_info.sub_path_set.insert("key0");
-    paths_set_info.sub_path_set.insert("key3");
-    paths_set_info.sub_path_set.insert("key4");
-    paths_set_info.sparse_path_set.insert("key1");
-    paths_set_info.sparse_path_set.insert("key2");
-    paths_set_info.sparse_path_set.insert("key5");
-    paths_set_info.sparse_path_set.insert("key6");
-    paths_set_info.sparse_path_set.insert("key7");
-    paths_set_info.sparse_path_set.insert("key8");
-    paths_set_info.sparse_path_set.insert("key9");
-    uid_to_paths_set_info[parent_column.unique_id()] = paths_set_info;
-    _tablet_schema->set_path_set_info(std::move(uid_to_paths_set_info));
-
-    // 14. check compaction subcolumn reader
-    check_leaf_reader();
-
-    // 15. check compaction root reader
-    st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<VariantRootColumnIterator*>(it) != nullptr);
-    st = it->init(column_iter_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-
-    // 16. check compacton sparse column
-    TabletColumn sparse_column = 
schema_util::create_sparse_column(parent_column);
-    st = variant_column_reader->new_iterator(&it, sparse_column, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<SparseColumnMergeReader*>(it) != nullptr);
-    st = it->init(column_iter_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    auto column_type = 
DataTypeFactory::instance().create_data_type(sparse_column, false);
-    auto read_column = column_type->create_column();
-    nrows = 1000;
-    st = it->seek_to_ordinal(0);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = it->next_batch(&nrows, read_column);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(stats.bytes_read > 0);
-
-    for (int row = 0; row < 1000; ++row) {
-        const std::string& value = column_type->to_string(*read_column, row);
-        EXPECT_TRUE(value.find("key0") == std::string::npos)
-                << "row: " << row << ", value: " << value;
-        EXPECT_TRUE(value.find("key3") == std::string::npos)
-                << "row: " << row << ", value: " << value;
-        EXPECT_TRUE(value.find("key4") == std::string::npos)
-                << "row: " << row << ", value: " << value;
-    }
-
-    // 17. check limit = 10000
-    subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-    subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
-    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-
-    for (int i = 0; i < limit; ++i) {
-        std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
-        variant_stats->sparse_column_non_null_size.erase(key);
-    }
-
-    // 18. check compacton sparse extract column
-    subcolumn.set_name(parent_column.name_lower_case() + ".key3");
-    subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key3"));
-    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-
-    // 19. check compaction default column
-    subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-    subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
-    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
-    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-}
-
-TEST_F(VariantColumnWriterReaderTest, test_write_data_advanced) {
-    // 1. create tablet_schema
-    TabletSchemaPB schema_pb;
-    schema_pb.set_keys_type(KeysType::DUP_KEYS);
-    SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", "V1", 
10);
-    _tablet_schema = std::make_shared<TabletSchema>();
-    _tablet_schema->init_from_pb(schema_pb);
-
-    // 2. create tablet
-    TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
-    tablet_meta->_tablet_id = 10000;
-    _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
-    EXPECT_TRUE(_tablet->init().ok());
-    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
-
-    // 3. create file_writer
-    io::FileWriterPtr file_writer;
-    auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
-    auto st = io::global_local_filesystem()->create_file(file_path, 
&file_writer);
-    EXPECT_TRUE(st.ok()) << st.msg();
-
-    // 4. create column_writer
-    SegmentFooterPB footer;
-    ColumnWriterOptions opts;
-    opts.meta = footer.add_columns();
-    opts.compression_type = CompressionTypePB::LZ4;
-    opts.file_writer = file_writer.get();
-    opts.footer = &footer;
-    RowsetWriterContext rowset_ctx;
-    rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
-    opts.rowset_ctx = &rowset_ctx;
-    opts.rowset_ctx->tablet_schema = _tablet_schema;
-    TabletColumn column = _tablet_schema->column(0);
-    _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
-
-    std::unique_ptr<ColumnWriter> writer;
-    EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(), 
&writer).ok());
-    EXPECT_TRUE(writer->init().ok());
-    EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
-
-    // 5. write data
-    auto olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
-    auto block = _tablet_schema->create_block();
-    auto column_object = 
(*std::move(block.get_by_position(0).column)).mutate();
-    std::unordered_map<int, std::string> inserted_jsonstr;
-    auto path_with_size = 
VariantUtil::fill_object_column_with_nested_test_data(column_object, 1000,
-                                                                               
 &inserted_jsonstr);
-    olap_data_convertor->add_column_data_convertor(column);
-    olap_data_convertor->set_source_content(&block, 0, 1000);
-    auto [result, accessor] = olap_data_convertor->convert_column_data(0);
-    EXPECT_TRUE(result.ok());
-    EXPECT_TRUE(accessor != nullptr);
-    EXPECT_TRUE(writer->append(accessor->get_nullmap(), accessor->get_data(), 
1000).ok());
-    st = writer->finish();
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = writer->write_data();
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = writer->write_ordinal_index();
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = writer->write_zone_map();
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(file_writer->close().ok());
-    footer.set_num_rows(1000);
-
-    // 6. check footer
-    EXPECT_EQ(footer.columns_size(), 12);
-    auto column_meta = footer.columns(0);
-    EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
-
-    for (int i = 1; i < footer.columns_size() - 1; ++i) {
-        auto column_meta = footer.columns(i);
-        check_column_meta(column_meta, path_with_size);
-    }
-    check_sparse_column_meta(footer.columns(footer.columns_size() - 1), 
path_with_size);
-
-    // 7. check variant reader
-    io::FileReaderSPtr file_reader;
-    st = io::global_local_filesystem()->open_file(file_path, &file_reader);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    ColumnReaderOptions read_opts;
-    std::unique_ptr<ColumnReader> column_reader;
-    st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader, 
&column_reader);
-    EXPECT_TRUE(st.ok()) << st.msg();
-
-    auto variant_column_reader = 
assert_cast<VariantColumnReader*>(column_reader.get());
-    EXPECT_TRUE(variant_column_reader != nullptr);
-
-    // 8. check statistics
-    auto statistics = variant_column_reader->get_stats();
-    for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
-        std::cout << "path: " << path << ", size: " << size << std::endl;
-        EXPECT_EQ(path_with_size[path], size);
-    }
-    for (const auto& [path, size] : statistics->sparse_column_non_null_size) {
-        std::cout << "sparse path: " << path << ", size: " << size << 
std::endl;
-        EXPECT_EQ(path_with_size[path], size);
-    }
-
-    // 9. check root
-    ColumnIterator* it;
-    TabletColumn parent_column = _tablet_schema->column(0);
-    StorageReadOptions storage_read_opts;
-    storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
-    st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-    ColumnIteratorOptions column_iter_opts;
-    OlapReaderStatistics stats;
-    column_iter_opts.stats = &stats;
-    column_iter_opts.file_reader = file_reader.get();
-    st = it->init(column_iter_opts);
-    EXPECT_TRUE(st.ok()) << st.msg();
-
-    MutableColumnPtr new_column_object = ColumnObject::create(3);
-    size_t nrows = 1000;
-    st = it->seek_to_ordinal(0);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    st = it->next_batch(&nrows, new_column_object);
-    EXPECT_TRUE(st.ok()) << st.msg();
-    EXPECT_TRUE(stats.bytes_read > 0);
-
-    for (int i = 0; i < 1000; ++i) {
-        std::string value;
-        st = assert_cast<ColumnObject*>(new_column_object.get())
-                     ->serialize_one_row_to_string(i, &value);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        EXPECT_EQ(value, inserted_jsonstr[i]);
-    }
-
-    auto read_to_column_object = [&]() {
-        new_column_object = ColumnObject::create(10);
-        nrows = 1000;
-        st = it->seek_to_ordinal(0);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        st = it->next_batch(&nrows, new_column_object);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        EXPECT_TRUE(stats.bytes_read > 0);
-        EXPECT_EQ(nrows, 1000);
-    };
-
-    auto check_key_stats = [&](const std::string& key_num) {
-        std::string key = ".key" + key_num;
-        TabletColumn subcolumn_in_nested;
-        subcolumn_in_nested.set_name(parent_column.name_lower_case() + key);
-        subcolumn_in_nested.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-        subcolumn_in_nested.set_parent_unique_id(parent_column.unique_id());
-        
subcolumn_in_nested.set_path_info(PathInData(parent_column.name_lower_case() + 
key));
-        subcolumn_in_nested.set_variant_max_subcolumns_count(
-                parent_column.variant_max_subcolumns_count());
-        subcolumn_in_nested.set_is_nullable(true);
-
-        st = variant_column_reader->new_iterator(&it, subcolumn_in_nested, 
&storage_read_opts);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-        st = it->init(column_iter_opts);
-        EXPECT_TRUE(st.ok()) << st.msg();
-        read_to_column_object();
-
-        size_t key_count = 0;
-        size_t key_nested_count = 0;
-        for (int row = 0; row < 1000; ++row) {
-            std::string value;
-            st = assert_cast<ColumnObject*>(new_column_object.get())
-                         ->serialize_one_row_to_string(row, &value);
-            EXPECT_TRUE(st.ok()) << st.msg();
-            if (value.find("nested" + key_num) != std::string::npos) {
-                key_nested_count++;
-            } else if (value.find("88") != std::string::npos) {
-                key_count++;
-            }
-        }
-        EXPECT_EQ(key_count, path_with_size["key" + key_num]);
-        EXPECT_EQ(key_nested_count, path_with_size["key" + key_num + ".nested" 
+ key_num]);
-    };
-
-    for (int i = 3; i < 10; ++i) {
-        check_key_stats(std::to_string(i));
-    }
-
-    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-}
-
-} // namespace doris
\ No newline at end of file
+// // Licensed to the Apache Software Foundation (ASF) under one
+// // or more contributor license agreements.  See the NOTICE file
+// // distributed with this work for additional information
+// // regarding copyright ownership.  The ASF licenses this file
+// // to you under the Apache License, Version 2.0 (the
+// // "License"); you may not use this file except in compliance
+// // with the License.  You may obtain a copy of the License at
+// //
+// //   http://www.apache.org/licenses/LICENSE-2.0
+// //
+// // Unless required by applicable law or agreed to in writing,
+// // software distributed under the License is distributed on an
+// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// // KIND, either express or implied.  See the License for the
+// // specific language governing permissions and limitations
+// // under the License.
+// 
+// #include "gtest/gtest.h"
+// #include "olap/rowset/segment_v2/column_reader.h"
+// #include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+// #include "olap/rowset/segment_v2/variant_column_writer_impl.h"
+// #include "olap/storage_engine.h"
+// #include "testutil/schema_utils.h"
+// #include "testutil/variant_util.h"
+// 
+// using namespace doris::vectorized;
+// 
+// namespace doris {
+// 
+// constexpr static uint32_t MAX_PATH_LEN = 1024;
+// constexpr static std::string_view dest_dir = 
"/ut_dir/variant_column_writer_test";
+// constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
+// 
+// class VariantColumnWriterReaderTest : public testing::Test {
+// public:
+//     void SetUp() override {
+//         // absolute dir
+//         char buffer[MAX_PATH_LEN];
+//         EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+//         _current_dir = std::string(buffer);
+//         _absolute_dir = _current_dir + std::string(dest_dir);
+//         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+//         
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
+// 
+//         // tmp dir
+//         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+//         
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
+//         std::vector<StorePath> paths;
+//         paths.emplace_back(std::string(tmp_dir), 1024000000);
+//         auto tmp_file_dirs = 
std::make_unique<segment_v2::TmpFileDirs>(paths);
+//         Status st = tmp_file_dirs->init();
+//         EXPECT_TRUE(st.ok()) << st.to_json();
+//         ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
+// 
+//         // storage engine
+//         doris::EngineOptions options;
+//         auto engine = std::make_unique<StorageEngine>(options);
+//         _engine_ref = engine.get();
+//         _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
+//         static_cast<void>(_data_dir->update_capacity());
+//         ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
+//     }
+// 
+//     void TearDown() override {
+//         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+//         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+//         _engine_ref = nullptr;
+//         ExecEnv::GetInstance()->set_storage_engine(nullptr);
+//     }
+// 
+//     VariantColumnWriterReaderTest() = default;
+//     ~VariantColumnWriterReaderTest() override = default;
+// 
+// private:
+//     TabletSchemaSPtr _tablet_schema = nullptr;
+//     StorageEngine* _engine_ref = nullptr;
+//     std::unique_ptr<DataDir> _data_dir = nullptr;
+//     TabletSharedPtr _tablet = nullptr;
+//     std::string _absolute_dir;
+//     std::string _current_dir;
+// };
+// 
+// void check_column_meta(const ColumnMetaPB& column_meta, auto& 
path_with_size) {
+//     EXPECT_TRUE(column_meta.has_column_path_info());
+//     auto path = std::make_shared<vectorized::PathInData>();
+//     path->from_protobuf(column_meta.column_path_info());
+//     EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
+//     EXPECT_EQ(column_meta.none_null_size(), 
path_with_size[path->copy_pop_front().get_path()]);
+// }
+// 
+// void check_sparse_column_meta(const ColumnMetaPB& column_meta, auto& 
path_with_size) {
+//     EXPECT_TRUE(column_meta.has_column_path_info());
+//     auto path = std::make_shared<vectorized::PathInData>();
+//     path->from_protobuf(column_meta.column_path_info());
+//     EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
+//     for (const auto& [path, size] :
+//          column_meta.variant_statistics().sparse_column_non_null_size()) {
+//         EXPECT_EQ(size, path_with_size[path]);
+//     }
+//     EXPECT_EQ(path->copy_pop_front().get_path(), 
"__DORIS_VARIANT_SPARSE__");
+// }
+// 
+// TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) {
+//     // 1. create tablet_schema
+//     TabletSchemaPB schema_pb;
+//     schema_pb.set_keys_type(KeysType::DUP_KEYS);
+//     SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", 
"V1");
+//     _tablet_schema = std::make_shared<TabletSchema>();
+//     _tablet_schema->init_from_pb(schema_pb);
+// 
+//     // 2. create tablet
+//     TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+//     tablet_meta->_tablet_id = 10000;
+//     _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
+// 
+//     EXPECT_TRUE(_tablet->init().ok());
+//     
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+//     
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+// 
+//     // 3. create file_writer
+//     io::FileWriterPtr file_writer;
+//     auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+//     auto st = io::global_local_filesystem()->create_file(file_path, 
&file_writer);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//     // 4. create column_writer
+//     SegmentFooterPB footer;
+//     ColumnWriterOptions opts;
+//     opts.meta = footer.add_columns();
+//     opts.compression_type = CompressionTypePB::LZ4;
+//     opts.file_writer = file_writer.get();
+//     opts.footer = &footer;
+//     RowsetWriterContext rowset_ctx;
+//     rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+//     opts.rowset_ctx = &rowset_ctx;
+//     opts.rowset_ctx->tablet_schema = _tablet_schema;
+//     TabletColumn column = _tablet_schema->column(0);
+//     _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
+// 
+//     std::unique_ptr<ColumnWriter> writer;
+//     EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(), 
&writer).ok());
+//     EXPECT_TRUE(writer->init().ok());
+//     EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
+// 
+//     // 5. write data
+//     auto olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
+//     auto block = _tablet_schema->create_block();
+//     auto column_object = 
(*std::move(block.get_by_position(0).column)).mutate();
+//     std::unordered_map<int, std::string> inserted_jsonstr;
+//     auto path_with_size =
+//             VariantUtil::fill_object_column_with_test_data(column_object, 
1000, &inserted_jsonstr);
+//     olap_data_convertor->add_column_data_convertor(column);
+//     olap_data_convertor->set_source_content(&block, 0, 1000);
+//     auto [result, accessor] = olap_data_convertor->convert_column_data(0);
+//     EXPECT_TRUE(result.ok());
+//     EXPECT_TRUE(accessor != nullptr);
+//     EXPECT_TRUE(writer->append(accessor->get_nullmap(), 
accessor->get_data(), 1000).ok());
+//     st = writer->finish();
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = writer->write_data();
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = writer->write_ordinal_index();
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = writer->write_zone_map();
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(file_writer->close().ok());
+//     footer.set_num_rows(1000);
+// 
+//     // 6. check footer
+//     EXPECT_EQ(footer.columns_size(), 5);
+//     auto column_meta = footer.columns(0);
+//     EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
+// 
+//     for (int i = 1; i < footer.columns_size() - 1; ++i) {
+//         auto column_meta = footer.columns(i);
+//         check_column_meta(column_meta, path_with_size);
+//     }
+//     check_sparse_column_meta(footer.columns(footer.columns_size() - 1), 
path_with_size);
+// 
+//     // 7. check variant reader
+//     io::FileReaderSPtr file_reader;
+//     st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     ColumnReaderOptions read_opts;
+//     std::unique_ptr<ColumnReader> column_reader;
+//     st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader, 
&column_reader);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//     auto variant_column_reader = 
assert_cast<VariantColumnReader*>(column_reader.get());
+//     EXPECT_TRUE(variant_column_reader != nullptr);
+// 
+//     auto subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key0"));
+//     EXPECT_TRUE(subcolumn_reader != nullptr);
+//     subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key1"));
+//     EXPECT_TRUE(subcolumn_reader != nullptr);
+//     subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key2"));
+//     EXPECT_TRUE(subcolumn_reader != nullptr);
+//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key3")));
+//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key4")));
+//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key5")));
+//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key6")));
+//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key7")));
+//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key8")));
+//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key9")));
+//     auto size = variant_column_reader->get_metadata_size();
+//     EXPECT_GT(size, 0);
+// 
+//     // 8. check statistics
+//     auto statistics = variant_column_reader->get_stats();
+//     for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
+//         EXPECT_EQ(path_with_size[path], size);
+//     }
+//     for (const auto& [path, size] : 
statistics->sparse_column_non_null_size) {
+//         EXPECT_EQ(path_with_size[path], size);
+//     }
+// 
+//     // 9. check hier reader
+//     ColumnIterator* it;
+//     TabletColumn parent_column = _tablet_schema->column(0);
+//     StorageReadOptions storage_read_opts;
+//     storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
+//     st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+//     ColumnIteratorOptions column_iter_opts;
+//     OlapReaderStatistics stats;
+//     column_iter_opts.stats = &stats;
+//     column_iter_opts.file_reader = file_reader.get();
+//     st = it->init(column_iter_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//     MutableColumnPtr new_column_object = ColumnObject::create(3);
+//     size_t nrows = 1000;
+//     st = it->seek_to_ordinal(0);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = it->next_batch(&nrows, new_column_object);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(stats.bytes_read > 0);
+// 
+//     for (int i = 0; i < 1000; ++i) {
+//         std::string value;
+//         st = assert_cast<ColumnObject*>(new_column_object.get())
+//                      ->serialize_one_row_to_string(i, &value);
+// 
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         EXPECT_EQ(value, inserted_jsonstr[i]);
+//     }
+// 
+//     std::vector<rowid_t> row_ids;
+//     for (int i = 0; i < 1000; ++i) {
+//         if (i % 7 == 0) {
+//             row_ids.push_back(i);
+//         }
+//     }
+//     new_column_object = ColumnObject::create(3);
+//     st = it->read_by_rowids(row_ids.data(), row_ids.size(), 
new_column_object);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     for (int i = 0; i < row_ids.size(); ++i) {
+//         std::string value;
+//         st = assert_cast<ColumnObject*>(new_column_object.get())
+//                      ->serialize_one_row_to_string(i, &value);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         EXPECT_EQ(value, inserted_jsonstr[row_ids[i]]);
+//     }
+// 
+//     auto read_to_column_object = [&]() {
+//         new_column_object = ColumnObject::create(3);
+//         nrows = 1000;
+//         st = it->seek_to_ordinal(0);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         st = it->next_batch(&nrows, new_column_object);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         EXPECT_TRUE(stats.bytes_read > 0);
+//         EXPECT_EQ(nrows, 1000);
+//     };
+// 
+//     // 10. check sparse extract reader
+//     for (int i = 3; i < 10; ++i) {
+//         std::string key = ".key" + std::to_string(i);
+//         TabletColumn subcolumn_in_sparse;
+//         subcolumn_in_sparse.set_name(parent_column.name_lower_case() + key);
+//         subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+//         subcolumn_in_sparse.set_parent_unique_id(parent_column.unique_id());
+//         
subcolumn_in_sparse.set_path_info(PathInData(parent_column.name_lower_case() + 
key));
+//         subcolumn_in_sparse.set_variant_max_subcolumns_count(
+//                 parent_column.variant_max_subcolumns_count());
+//         subcolumn_in_sparse.set_is_nullable(true);
+// 
+//         st = variant_column_reader->new_iterator(&it, subcolumn_in_sparse, 
&storage_read_opts);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+//         st = it->init(column_iter_opts);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//         read_to_column_object();
+// 
+//         for (int row = 0; row < 1000; ++row) {
+//             std::string value;
+//             st = assert_cast<ColumnObject*>(new_column_object.get())
+//                          ->serialize_one_row_to_string(row, &value);
+//             EXPECT_TRUE(st.ok()) << st.msg();
+//             if (inserted_jsonstr[row].find(key) != std::string::npos) {
+//                 if (i % 2 == 0) {
+//                     EXPECT_EQ(value, "88");
+//                 } else {
+//                     EXPECT_EQ(value, "str99");
+//                 }
+//             }
+//         }
+//     }
+// 
+//     // 11. check leaf reader
+//     auto check_leaf_reader = [&]() {
+//         for (int i = 0; i < 3; ++i) {
+//             std::string key = ".key" + std::to_string(i);
+//             TabletColumn subcolumn;
+//             subcolumn.set_name(parent_column.name_lower_case() + key);
+//             subcolumn.set_type((FieldType)(int)footer.columns(i + 
1).type());
+//             subcolumn.set_parent_unique_id(parent_column.unique_id());
+//             
subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + key));
+//             subcolumn.set_variant_max_subcolumns_count(
+//                     parent_column.variant_max_subcolumns_count());
+//             subcolumn.set_is_nullable(true);
+// 
+//             st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+//             EXPECT_TRUE(st.ok()) << st.msg();
+//             EXPECT_TRUE(assert_cast<FileColumnIterator*>(it) != nullptr);
+//             st = it->init(column_iter_opts);
+//             EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//             auto column_type = 
DataTypeFactory::instance().create_data_type(subcolumn, false);
+//             auto read_column = column_type->create_column();
+//             nrows = 1000;
+//             st = it->seek_to_ordinal(0);
+//             EXPECT_TRUE(st.ok()) << st.msg();
+//             st = it->next_batch(&nrows, read_column);
+//             EXPECT_TRUE(st.ok()) << st.msg();
+//             EXPECT_TRUE(stats.bytes_read > 0);
+// 
+//             for (int row = 0; row < 1000; ++row) {
+//                 const std::string& value = 
column_type->to_string(*read_column, row);
+//                 if (inserted_jsonstr[row].find(key) != std::string::npos) {
+//                     if (i % 2 == 0) {
+//                         EXPECT_EQ(value, "88");
+//                     } else {
+//                         EXPECT_EQ(value, "str99");
+//                     }
+//                 }
+//             }
+//         }
+//     };
+//     check_leaf_reader();
+// 
+//     // 12. check empty
+//     TabletColumn subcolumn;
+//     subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+//     subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+//     subcolumn.set_parent_unique_id(parent_column.unique_id());
+//     subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
+//     subcolumn.set_is_nullable(true);
+//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
+// 
+//     // 13. check statistics size == limit
+//     auto& variant_stats = variant_column_reader->_statistics;
+//     EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() <
+//                 config::variant_max_sparse_column_statistics_size);
+//     auto limit = config::variant_max_sparse_column_statistics_size -
+//                  variant_stats->sparse_column_non_null_size.size();
+//     for (int i = 0; i < limit; ++i) {
+//         std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
+//         variant_stats->sparse_column_non_null_size[key] = 10000;
+//     }
+//     EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() ==
+//                 config::variant_max_sparse_column_statistics_size);
+// 
+//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+//     st = it->init(column_iter_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//     auto check_empty_column = [&]() {
+//         for (int row = 0; row < 1000; ++row) {
+//             std::string value;
+//             st = assert_cast<ColumnObject*>(new_column_object.get())
+//                          ->serialize_one_row_to_string(row, &value);
+// 
+//             EXPECT_TRUE(st.ok()) << st.msg();
+//             EXPECT_EQ(value, "{}");
+//         }
+//     };
+// 
+//     read_to_column_object();
+//     check_empty_column();
+// 
+//     // construct tablet schema for compaction
+//     storage_read_opts.io_ctx.reader_type = 
ReaderType::READER_BASE_COMPACTION;
+//     storage_read_opts.tablet_schema = _tablet_schema;
+//     std::unordered_map<int32_t, TabletSchema::PathsSetInfo> 
uid_to_paths_set_info;
+//     TabletSchema::PathsSetInfo paths_set_info;
+//     paths_set_info.sub_path_set.insert("key0");
+//     paths_set_info.sub_path_set.insert("key3");
+//     paths_set_info.sub_path_set.insert("key4");
+//     paths_set_info.sparse_path_set.insert("key1");
+//     paths_set_info.sparse_path_set.insert("key2");
+//     paths_set_info.sparse_path_set.insert("key5");
+//     paths_set_info.sparse_path_set.insert("key6");
+//     paths_set_info.sparse_path_set.insert("key7");
+//     paths_set_info.sparse_path_set.insert("key8");
+//     paths_set_info.sparse_path_set.insert("key9");
+//     uid_to_paths_set_info[parent_column.unique_id()] = paths_set_info;
+//     _tablet_schema->set_path_set_info(std::move(uid_to_paths_set_info));
+// 
+//     // 14. check compaction subcolumn reader
+//     check_leaf_reader();
+// 
+//     // 15. check compaction root reader
+//     st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<VariantRootColumnIterator*>(it) != nullptr);
+//     st = it->init(column_iter_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//     // 16. check compacton sparse column
+//     TabletColumn sparse_column = 
schema_util::create_sparse_column(parent_column);
+//     st = variant_column_reader->new_iterator(&it, sparse_column, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<SparseColumnMergeReader*>(it) != nullptr);
+//     st = it->init(column_iter_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     auto column_type = 
DataTypeFactory::instance().create_data_type(sparse_column, false);
+//     auto read_column = column_type->create_column();
+//     nrows = 1000;
+//     st = it->seek_to_ordinal(0);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = it->next_batch(&nrows, read_column);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(stats.bytes_read > 0);
+// 
+//     for (int row = 0; row < 1000; ++row) {
+//         const std::string& value = column_type->to_string(*read_column, 
row);
+//         EXPECT_TRUE(value.find("key0") == std::string::npos)
+//                 << "row: " << row << ", value: " << value;
+//         EXPECT_TRUE(value.find("key3") == std::string::npos)
+//                 << "row: " << row << ", value: " << value;
+//         EXPECT_TRUE(value.find("key4") == std::string::npos)
+//                 << "row: " << row << ", value: " << value;
+//     }
+// 
+//     // 17. check limit = 10000
+//     subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+//     subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
+//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+// 
+//     for (int i = 0; i < limit; ++i) {
+//         std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
+//         variant_stats->sparse_column_non_null_size.erase(key);
+//     }
+// 
+//     // 18. check compacton sparse extract column
+//     subcolumn.set_name(parent_column.name_lower_case() + ".key3");
+//     subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key3"));
+//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+// 
+//     // 19. check compaction default column
+//     subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+//     subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
+//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
+//     
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+// }
+// 
+// TEST_F(VariantColumnWriterReaderTest, test_write_data_advanced) {
+//     // 1. create tablet_schema
+//     TabletSchemaPB schema_pb;
+//     schema_pb.set_keys_type(KeysType::DUP_KEYS);
+//     SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", 
"V1", 10);
+//     _tablet_schema = std::make_shared<TabletSchema>();
+//     _tablet_schema->init_from_pb(schema_pb);
+// 
+//     // 2. create tablet
+//     TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+//     tablet_meta->_tablet_id = 10000;
+//     _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
+//     EXPECT_TRUE(_tablet->init().ok());
+//     
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+//     
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+// 
+//     // 3. create file_writer
+//     io::FileWriterPtr file_writer;
+//     auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+//     auto st = io::global_local_filesystem()->create_file(file_path, 
&file_writer);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//     // 4. create column_writer
+//     SegmentFooterPB footer;
+//     ColumnWriterOptions opts;
+//     opts.meta = footer.add_columns();
+//     opts.compression_type = CompressionTypePB::LZ4;
+//     opts.file_writer = file_writer.get();
+//     opts.footer = &footer;
+//     RowsetWriterContext rowset_ctx;
+//     rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+//     opts.rowset_ctx = &rowset_ctx;
+//     opts.rowset_ctx->tablet_schema = _tablet_schema;
+//     TabletColumn column = _tablet_schema->column(0);
+//     _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
+// 
+//     std::unique_ptr<ColumnWriter> writer;
+//     EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(), 
&writer).ok());
+//     EXPECT_TRUE(writer->init().ok());
+//     EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
+// 
+//     // 5. write data
+//     auto olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
+//     auto block = _tablet_schema->create_block();
+//     auto column_object = 
(*std::move(block.get_by_position(0).column)).mutate();
+//     std::unordered_map<int, std::string> inserted_jsonstr;
+//     auto path_with_size = 
VariantUtil::fill_object_column_with_nested_test_data(column_object, 1000,
+//                                                                             
    &inserted_jsonstr);
+//     olap_data_convertor->add_column_data_convertor(column);
+//     olap_data_convertor->set_source_content(&block, 0, 1000);
+//     auto [result, accessor] = olap_data_convertor->convert_column_data(0);
+//     EXPECT_TRUE(result.ok());
+//     EXPECT_TRUE(accessor != nullptr);
+//     EXPECT_TRUE(writer->append(accessor->get_nullmap(), 
accessor->get_data(), 1000).ok());
+//     st = writer->finish();
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = writer->write_data();
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = writer->write_ordinal_index();
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = writer->write_zone_map();
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(file_writer->close().ok());
+//     footer.set_num_rows(1000);
+// 
+//     // 6. check footer
+//     EXPECT_EQ(footer.columns_size(), 12);
+//     auto column_meta = footer.columns(0);
+//     EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
+// 
+//     for (int i = 1; i < footer.columns_size() - 1; ++i) {
+//         auto column_meta = footer.columns(i);
+//         check_column_meta(column_meta, path_with_size);
+//     }
+//     check_sparse_column_meta(footer.columns(footer.columns_size() - 1), 
path_with_size);
+// 
+//     // 7. check variant reader
+//     io::FileReaderSPtr file_reader;
+//     st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     ColumnReaderOptions read_opts;
+//     std::unique_ptr<ColumnReader> column_reader;
+//     st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader, 
&column_reader);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//     auto variant_column_reader = 
assert_cast<VariantColumnReader*>(column_reader.get());
+//     EXPECT_TRUE(variant_column_reader != nullptr);
+// 
+//     // 8. check statistics
+//     auto statistics = variant_column_reader->get_stats();
+//     for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
+//         std::cout << "path: " << path << ", size: " << size << std::endl;
+//         EXPECT_EQ(path_with_size[path], size);
+//     }
+//     for (const auto& [path, size] : 
statistics->sparse_column_non_null_size) {
+//         std::cout << "sparse path: " << path << ", size: " << size << 
std::endl;
+//         EXPECT_EQ(path_with_size[path], size);
+//     }
+// 
+//     // 9. check root
+//     ColumnIterator* it;
+//     TabletColumn parent_column = _tablet_schema->column(0);
+//     StorageReadOptions storage_read_opts;
+//     storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
+//     st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+//     ColumnIteratorOptions column_iter_opts;
+//     OlapReaderStatistics stats;
+//     column_iter_opts.stats = &stats;
+//     column_iter_opts.file_reader = file_reader.get();
+//     st = it->init(column_iter_opts);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+// 
+//     MutableColumnPtr new_column_object = ColumnObject::create(3);
+//     size_t nrows = 1000;
+//     st = it->seek_to_ordinal(0);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     st = it->next_batch(&nrows, new_column_object);
+//     EXPECT_TRUE(st.ok()) << st.msg();
+//     EXPECT_TRUE(stats.bytes_read > 0);
+// 
+//     for (int i = 0; i < 1000; ++i) {
+//         std::string value;
+//         st = assert_cast<ColumnObject*>(new_column_object.get())
+//                      ->serialize_one_row_to_string(i, &value);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         EXPECT_EQ(value, inserted_jsonstr[i]);
+//     }
+// 
+//     auto read_to_column_object = [&]() {
+//         new_column_object = ColumnObject::create(10);
+//         nrows = 1000;
+//         st = it->seek_to_ordinal(0);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         st = it->next_batch(&nrows, new_column_object);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         EXPECT_TRUE(stats.bytes_read > 0);
+//         EXPECT_EQ(nrows, 1000);
+//     };
+// 
+//     auto check_key_stats = [&](const std::string& key_num) {
+//         std::string key = ".key" + key_num;
+//         TabletColumn subcolumn_in_nested;
+//         subcolumn_in_nested.set_name(parent_column.name_lower_case() + key);
+//         subcolumn_in_nested.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+//         subcolumn_in_nested.set_parent_unique_id(parent_column.unique_id());
+//         
subcolumn_in_nested.set_path_info(PathInData(parent_column.name_lower_case() + 
key));
+//         subcolumn_in_nested.set_variant_max_subcolumns_count(
+//                 parent_column.variant_max_subcolumns_count());
+//         subcolumn_in_nested.set_is_nullable(true);
+// 
+//         st = variant_column_reader->new_iterator(&it, subcolumn_in_nested, 
&storage_read_opts);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+//         st = it->init(column_iter_opts);
+//         EXPECT_TRUE(st.ok()) << st.msg();
+//         read_to_column_object();
+// 
+//         size_t key_count = 0;
+//         size_t key_nested_count = 0;
+//         for (int row = 0; row < 1000; ++row) {
+//             std::string value;
+//             st = assert_cast<ColumnObject*>(new_column_object.get())
+//                          ->serialize_one_row_to_string(row, &value);
+//             EXPECT_TRUE(st.ok()) << st.msg();
+//             if (value.find("nested" + key_num) != std::string::npos) {
+//                 key_nested_count++;
+//             } else if (value.find("88") != std::string::npos) {
+//                 key_count++;
+//             }
+//         }
+//         EXPECT_EQ(key_count, path_with_size["key" + key_num]);
+//         EXPECT_EQ(key_nested_count, path_with_size["key" + key_num + 
".nested" + key_num]);
+//     };
+// 
+//     for (int i = 3; i < 10; ++i) {
+//         check_key_stats(std::to_string(i));
+//     }
+// 
+//     
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+// }
+// 
+// } // namespace doris
\ No newline at end of file
diff --git a/be/test/olap/rowset/variant_with_compaction_test.cpp 
b/be/test/olap/rowset/variant_with_compaction_test.cpp
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index d4d50033087..c0137934421 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -2610,15 +2610,19 @@ public class OlapTable extends Table implements 
MTMVRelatedTableIf, GsonPostProc
     }
 
     public void setVariantMaxSubcolumnsCount(int maxSubcoumnsCount) {
-        
getOrCreatTableProperty().setVariantMaxSubcolumnsCount(maxSubcoumnsCount);
         List<Column> columns = getBaseSchema(true);
+        boolean hasVariantType = false;
         for (Column column : columns) {
             Type type = column.getType();
             if (type.isVariantType()) {
+                hasVariantType = true;
                 VariantType scType = (VariantType) type;
                 scType.setVariantMaxSubcolumnsCount(maxSubcoumnsCount);
             }
         }
+        if (hasVariantType) {
+            
getOrCreatTableProperty().setVariantMaxSubcolumnsCount(maxSubcoumnsCount);
+        }
     }
 
     public int getVariantMaxSubcolumnsCount() {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
index 33efcf950ba..98ac60d5375 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
@@ -396,13 +396,16 @@ public abstract class DataType {
             org.apache.doris.catalog.ArrayType arrayType = 
(org.apache.doris.catalog.ArrayType) type;
             return ArrayType.of(fromCatalogType(arrayType.getItemType()), 
arrayType.getContainsNull());
         } else if (type.isVariantType()) {
-            List<VariantField> variantFields = 
((org.apache.doris.catalog.VariantType) type)
-                    .getPredefinedFields().stream()
-                    .map(cf -> new VariantField(cf.getPattern(), 
fromCatalogType(cf.getType()),
-                            cf.getComment() == null ? "" : cf.getComment(), 
cf.getPatternType().toString()))
-                    .collect(ImmutableList.toImmutableList());
-            return new VariantType(variantFields,
-                            ((org.apache.doris.catalog.VariantType) 
type).getVariantMaxSubcolumnsCount());
+            if (type instanceof org.apache.doris.catalog.VariantType) {
+                List<VariantField> variantFields = 
((org.apache.doris.catalog.VariantType) type)
+                        .getPredefinedFields().stream()
+                        .map(cf -> new VariantField(cf.getPattern(), 
fromCatalogType(cf.getType()),
+                                cf.getComment() == null ? "" : 
cf.getComment(), cf.getPatternType().toString()))
+                        .collect(ImmutableList.toImmutableList());
+                return new VariantType(variantFields,
+                        ((org.apache.doris.catalog.VariantType) 
type).getVariantMaxSubcolumnsCount());
+            }
+            return new VariantType(0);
         } else {
             return UnsupportedType.INSTANCE;
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 140c9235b78..d73708ac19a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2498,7 +2498,7 @@ public class SessionVariable implements Serializable, 
Writable {
             checker = "checkGlobalVariantMaxSubcolumnsCount",
             fuzzy = true
     )
-    public int globalVariantMaxSubcolumnsCount = 5;
+    public int globalVariantMaxSubcolumnsCount = 2048;
 
     public void setEnableEsParallelScroll(boolean enableESParallelScroll) {
         this.enableESParallelScroll = enableESParallelScroll;
diff --git 
a/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
 
b/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
index efcecd75953..0097ff185ac 100644
Binary files 
a/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
 and 
b/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
 differ
diff --git 
a/regression-test/data/schema_change_p0/test_modify_reorder_column.out 
b/regression-test/data/schema_change_p0/test_modify_reorder_column.out
index ce2b54972c4..9b3a9cbd122 100644
Binary files 
a/regression-test/data/schema_change_p0/test_modify_reorder_column.out and 
b/regression-test/data/schema_change_p0/test_modify_reorder_column.out differ
diff --git a/regression-test/data/variant_p0/load.out 
b/regression-test/data/variant_p0/load.out
index ecbfb38a747..5f0731b29e9 100644
Binary files a/regression-test/data/variant_p0/load.out and 
b/regression-test/data/variant_p0/load.out differ
diff --git a/regression-test/suites/variant_p0/load.groovy 
b/regression-test/suites/variant_p0/load.groovy
index 8661d86983c..e5adb356390 100644
--- a/regression-test/suites/variant_p0/load.groovy
+++ b/regression-test/suites/variant_p0/load.groovy
@@ -101,6 +101,7 @@ suite("regression_test_variant", "p0"){
             qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from  
${table_name} where cast(v['b'] as string) != 'null' and cast(v['b'] as string) 
is not null and   cast(v['b'] as string) != '{}' order by k,cast(v as string) 
desc limit 10000;"
             qt_sql5 "select v['b'] from ${table_name} where cast(v['b'] as 
int) > 0;"
             qt_sql6 "select cast(v['b'] as string) from ${table_name} where  
cast(v['b'] as string) != 'null' and cast(v['b'] as string) is not null and   
cast(v['b'] as string) != '{}' order by k,  cast(v['b'] as string) "
+            qt_sql7 "select * from ${table_name} where v >= 5 order by k limit 
5"
             // verify table_name 
         }
         sql "insert into simple_variant_DUPLICATE select k, cast(v as string) 
from simple_variant_UNIQUE;"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to