This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new 23a520dae92 [fix](ut) fix  variant_column_writer_reader_test.cpp 
column_object_test.cpp
23a520dae92 is described below

commit 23a520dae92f192a4f6935f8e8de19d4b2c143ad
Author: Sun Chenyang <suncheny...@selectdb.com>
AuthorDate: Thu Apr 24 23:25:44 2025 +0800

    [fix](ut) fix  variant_column_writer_reader_test.cpp column_object_test.cpp
---
 be/src/olap/rowset/segment_v2/column_reader.h      |    2 +-
 .../variant_column_writer_reader_test.cpp          | 1326 ++++++++---------
 be/test/vec/columns/column_object_test.cpp         | 1568 ++++++++++----------
 3 files changed, 1453 insertions(+), 1443 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.h 
b/be/src/olap/rowset/segment_v2/column_reader.h
index c0c6ca06882..363bc010f16 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -89,7 +89,7 @@ struct ColumnReaderOptions {
 
     int be_exec_version = -1;
 
-    const TabletSchemaSPtr tablet_schema = nullptr;
+    TabletSchemaSPtr tablet_schema = nullptr;
 };
 
 struct ColumnIteratorOptions {
diff --git 
a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp 
b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
index 0138d83adb5..d1c20ca2306 100644
--- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
@@ -1,663 +1,663 @@
-// // Licensed to the Apache Software Foundation (ASF) under one
-// // or more contributor license agreements.  See the NOTICE file
-// // distributed with this work for additional information
-// // regarding copyright ownership.  The ASF licenses this file
-// // to you under the Apache License, Version 2.0 (the
-// // "License"); you may not use this file except in compliance
-// // with the License.  You may obtain a copy of the License at
-// //
-// //   http://www.apache.org/licenses/LICENSE-2.0
-// //
-// // Unless required by applicable law or agreed to in writing,
-// // software distributed under the License is distributed on an
-// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// // KIND, either express or implied.  See the License for the
-// // specific language governing permissions and limitations
-// // under the License.
-//
-// #include "gtest/gtest.h"
-// #include "olap/rowset/segment_v2/column_reader.h"
-// #include "olap/rowset/segment_v2/hierarchical_data_reader.h"
-// #include "olap/rowset/segment_v2/variant_column_writer_impl.h"
-// #include "olap/storage_engine.h"
-// #include "testutil/schema_utils.h"
-// #include "testutil/variant_util.h"
-//
-// using namespace doris::vectorized;
-//
-// namespace doris {
-//
-// constexpr static uint32_t MAX_PATH_LEN = 1024;
-// constexpr static std::string_view dest_dir = 
"/ut_dir/variant_column_writer_test";
-// constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
-//
-// class VariantColumnWriterReaderTest : public testing::Test {
-// public:
-//     void SetUp() override {
-//         // absolute dir
-//         char buffer[MAX_PATH_LEN];
-//         EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
-//         _current_dir = std::string(buffer);
-//         _absolute_dir = _current_dir + std::string(dest_dir);
-//         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
-//         
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
-//
-//         // tmp dir
-//         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
-//         
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
-//         std::vector<StorePath> paths;
-//         paths.emplace_back(std::string(tmp_dir), 1024000000);
-//         auto tmp_file_dirs = 
std::make_unique<segment_v2::TmpFileDirs>(paths);
-//         Status st = tmp_file_dirs->init();
-//         EXPECT_TRUE(st.ok()) << st.to_json();
-//         ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
-//
-//         // storage engine
-//         doris::EngineOptions options;
-//         auto engine = std::make_unique<StorageEngine>(options);
-//         _engine_ref = engine.get();
-//         _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
-//         static_cast<void>(_data_dir->update_capacity());
-//         ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
-//     }
-//
-//     void TearDown() override {
-//         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
-//         
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
-//         _engine_ref = nullptr;
-//         ExecEnv::GetInstance()->set_storage_engine(nullptr);
-//     }
-//
-//     VariantColumnWriterReaderTest() = default;
-//     ~VariantColumnWriterReaderTest() override = default;
-//
-// private:
-//     TabletSchemaSPtr _tablet_schema = nullptr;
-//     StorageEngine* _engine_ref = nullptr;
-//     std::unique_ptr<DataDir> _data_dir = nullptr;
-//     TabletSharedPtr _tablet = nullptr;
-//     std::string _absolute_dir;
-//     std::string _current_dir;
-// };
-//
-// void check_column_meta(const ColumnMetaPB& column_meta, auto& 
path_with_size) {
-//     EXPECT_TRUE(column_meta.has_column_path_info());
-//     auto path = std::make_shared<vectorized::PathInData>();
-//     path->from_protobuf(column_meta.column_path_info());
-//     EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
-//     EXPECT_EQ(column_meta.none_null_size(), 
path_with_size[path->copy_pop_front().get_path()]);
-// }
-//
-// void check_sparse_column_meta(const ColumnMetaPB& column_meta, auto& 
path_with_size) {
-//     EXPECT_TRUE(column_meta.has_column_path_info());
-//     auto path = std::make_shared<vectorized::PathInData>();
-//     path->from_protobuf(column_meta.column_path_info());
-//     EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
-//     for (const auto& [path, size] :
-//          column_meta.variant_statistics().sparse_column_non_null_size()) {
-//         EXPECT_EQ(size, path_with_size[path]);
-//     }
-//     EXPECT_EQ(path->copy_pop_front().get_path(), 
"__DORIS_VARIANT_SPARSE__");
-// }
-//
-// TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) {
-//     // 1. create tablet_schema
-//     TabletSchemaPB schema_pb;
-//     schema_pb.set_keys_type(KeysType::DUP_KEYS);
-//     SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", 
"V1");
-//     _tablet_schema = std::make_shared<TabletSchema>();
-//     _tablet_schema->init_from_pb(schema_pb);
-//
-//     // 2. create tablet
-//     TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
-//     tablet_meta->_tablet_id = 10000;
-//     _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
-//
-//     EXPECT_TRUE(_tablet->init().ok());
-//     
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-//     
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
-//
-//     // 3. create file_writer
-//     io::FileWriterPtr file_writer;
-//     auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
-//     auto st = io::global_local_filesystem()->create_file(file_path, 
&file_writer);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//
-//     // 4. create column_writer
-//     SegmentFooterPB footer;
-//     ColumnWriterOptions opts;
-//     opts.meta = footer.add_columns();
-//     opts.compression_type = CompressionTypePB::LZ4;
-//     opts.file_writer = file_writer.get();
-//     opts.footer = &footer;
-//     RowsetWriterContext rowset_ctx;
-//     rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
-//     opts.rowset_ctx = &rowset_ctx;
-//     opts.rowset_ctx->tablet_schema = _tablet_schema;
-//     TabletColumn column = _tablet_schema->column(0);
-//     _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
-//
-//     std::unique_ptr<ColumnWriter> writer;
-//     EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(), 
&writer).ok());
-//     EXPECT_TRUE(writer->init().ok());
-//     EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
-//
-//     // 5. write data
-//     auto olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
-//     auto block = _tablet_schema->create_block();
-//     auto column_object = 
(*std::move(block.get_by_position(0).column)).mutate();
-//     std::unordered_map<int, std::string> inserted_jsonstr;
-//     auto path_with_size =
-//             VariantUtil::fill_object_column_with_test_data(column_object, 
1000, &inserted_jsonstr);
-//     olap_data_convertor->add_column_data_convertor(column);
-//     olap_data_convertor->set_source_content(&block, 0, 1000);
-//     auto [result, accessor] = olap_data_convertor->convert_column_data(0);
-//     EXPECT_TRUE(result.ok());
-//     EXPECT_TRUE(accessor != nullptr);
-//     EXPECT_TRUE(writer->append(accessor->get_nullmap(), 
accessor->get_data(), 1000).ok());
-//     st = writer->finish();
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = writer->write_data();
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = writer->write_ordinal_index();
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = writer->write_zone_map();
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(file_writer->close().ok());
-//     footer.set_num_rows(1000);
-//
-//     // 6. check footer
-//     EXPECT_EQ(footer.columns_size(), 5);
-//     auto column_meta = footer.columns(0);
-//     EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
-//
-//     for (int i = 1; i < footer.columns_size() - 1; ++i) {
-//         auto column_meta = footer.columns(i);
-//         check_column_meta(column_meta, path_with_size);
-//     }
-//     check_sparse_column_meta(footer.columns(footer.columns_size() - 1), 
path_with_size);
-//
-//     // 7. check variant reader
-//     io::FileReaderSPtr file_reader;
-//     st = io::global_local_filesystem()->open_file(file_path, &file_reader);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     ColumnReaderOptions read_opts;
-//     std::unique_ptr<ColumnReader> column_reader;
-//     st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader, 
&column_reader);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//
-//     auto variant_column_reader = 
assert_cast<VariantColumnReader*>(column_reader.get());
-//     EXPECT_TRUE(variant_column_reader != nullptr);
-//
-//     auto subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key0"));
-//     EXPECT_TRUE(subcolumn_reader != nullptr);
-//     subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key1"));
-//     EXPECT_TRUE(subcolumn_reader != nullptr);
-//     subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key2"));
-//     EXPECT_TRUE(subcolumn_reader != nullptr);
-//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key3")));
-//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key4")));
-//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key5")));
-//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key6")));
-//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key7")));
-//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key8")));
-//     
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key9")));
-//     auto size = variant_column_reader->get_metadata_size();
-//     EXPECT_GT(size, 0);
-//
-//     // 8. check statistics
-//     auto statistics = variant_column_reader->get_stats();
-//     for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
-//         EXPECT_EQ(path_with_size[path], size);
-//     }
-//     for (const auto& [path, size] : 
statistics->sparse_column_non_null_size) {
-//         EXPECT_EQ(path_with_size[path], size);
-//     }
-//
-//     // 9. check hier reader
-//     ColumnIterator* it;
-//     TabletColumn parent_column = _tablet_schema->column(0);
-//     StorageReadOptions storage_read_opts;
-//     storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
-//     st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-//     ColumnIteratorOptions column_iter_opts;
-//     OlapReaderStatistics stats;
-//     column_iter_opts.stats = &stats;
-//     column_iter_opts.file_reader = file_reader.get();
-//     st = it->init(column_iter_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//
-//     MutableColumnPtr new_column_object = ColumnObject::create(3);
-//     size_t nrows = 1000;
-//     st = it->seek_to_ordinal(0);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = it->next_batch(&nrows, new_column_object);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(stats.bytes_read > 0);
-//
-//     for (int i = 0; i < 1000; ++i) {
-//         std::string value;
-//         st = assert_cast<ColumnObject*>(new_column_object.get())
-//                      ->serialize_one_row_to_string(i, &value);
-//
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         EXPECT_EQ(value, inserted_jsonstr[i]);
-//     }
-//
-//     std::vector<rowid_t> row_ids;
-//     for (int i = 0; i < 1000; ++i) {
-//         if (i % 7 == 0) {
-//             row_ids.push_back(i);
-//         }
-//     }
-//     new_column_object = ColumnObject::create(3);
-//     st = it->read_by_rowids(row_ids.data(), row_ids.size(), 
new_column_object);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     for (int i = 0; i < row_ids.size(); ++i) {
-//         std::string value;
-//         st = assert_cast<ColumnObject*>(new_column_object.get())
-//                      ->serialize_one_row_to_string(i, &value);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         EXPECT_EQ(value, inserted_jsonstr[row_ids[i]]);
-//     }
-//
-//     auto read_to_column_object = [&]() {
-//         new_column_object = ColumnObject::create(3);
-//         nrows = 1000;
-//         st = it->seek_to_ordinal(0);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         st = it->next_batch(&nrows, new_column_object);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         EXPECT_TRUE(stats.bytes_read > 0);
-//         EXPECT_EQ(nrows, 1000);
-//     };
-//
-//     // 10. check sparse extract reader
-//     for (int i = 3; i < 10; ++i) {
-//         std::string key = ".key" + std::to_string(i);
-//         TabletColumn subcolumn_in_sparse;
-//         subcolumn_in_sparse.set_name(parent_column.name_lower_case() + key);
-//         subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-//         subcolumn_in_sparse.set_parent_unique_id(parent_column.unique_id());
-//         
subcolumn_in_sparse.set_path_info(PathInData(parent_column.name_lower_case() + 
key));
-//         subcolumn_in_sparse.set_variant_max_subcolumns_count(
-//                 parent_column.variant_max_subcolumns_count());
-//         subcolumn_in_sparse.set_is_nullable(true);
-//
-//         st = variant_column_reader->new_iterator(&it, subcolumn_in_sparse, 
&storage_read_opts);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-//         st = it->init(column_iter_opts);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//
-//         read_to_column_object();
-//
-//         for (int row = 0; row < 1000; ++row) {
-//             std::string value;
-//             st = assert_cast<ColumnObject*>(new_column_object.get())
-//                          ->serialize_one_row_to_string(row, &value);
-//             EXPECT_TRUE(st.ok()) << st.msg();
-//             if (inserted_jsonstr[row].find(key) != std::string::npos) {
-//                 if (i % 2 == 0) {
-//                     EXPECT_EQ(value, "88");
-//                 } else {
-//                     EXPECT_EQ(value, "str99");
-//                 }
-//             }
-//         }
-//     }
-//
-//     // 11. check leaf reader
-//     auto check_leaf_reader = [&]() {
-//         for (int i = 0; i < 3; ++i) {
-//             std::string key = ".key" + std::to_string(i);
-//             TabletColumn subcolumn;
-//             subcolumn.set_name(parent_column.name_lower_case() + key);
-//             subcolumn.set_type((FieldType)(int)footer.columns(i + 
1).type());
-//             subcolumn.set_parent_unique_id(parent_column.unique_id());
-//             
subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + key));
-//             subcolumn.set_variant_max_subcolumns_count(
-//                     parent_column.variant_max_subcolumns_count());
-//             subcolumn.set_is_nullable(true);
-//
-//             st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-//             EXPECT_TRUE(st.ok()) << st.msg();
-//             EXPECT_TRUE(assert_cast<FileColumnIterator*>(it) != nullptr);
-//             st = it->init(column_iter_opts);
-//             EXPECT_TRUE(st.ok()) << st.msg();
-//
-//             auto column_type = 
DataTypeFactory::instance().create_data_type(subcolumn, false);
-//             auto read_column = column_type->create_column();
-//             nrows = 1000;
-//             st = it->seek_to_ordinal(0);
-//             EXPECT_TRUE(st.ok()) << st.msg();
-//             st = it->next_batch(&nrows, read_column);
-//             EXPECT_TRUE(st.ok()) << st.msg();
-//             EXPECT_TRUE(stats.bytes_read > 0);
-//
-//             for (int row = 0; row < 1000; ++row) {
-//                 const std::string& value = 
column_type->to_string(*read_column, row);
-//                 if (inserted_jsonstr[row].find(key) != std::string::npos) {
-//                     if (i % 2 == 0) {
-//                         EXPECT_EQ(value, "88");
-//                     } else {
-//                         EXPECT_EQ(value, "str99");
-//                     }
-//                 }
-//             }
-//         }
-//     };
-//     check_leaf_reader();
-//
-//     // 12. check empty
-//     TabletColumn subcolumn;
-//     subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-//     subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-//     subcolumn.set_parent_unique_id(parent_column.unique_id());
-//     subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
-//     subcolumn.set_is_nullable(true);
-//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
-//
-//     // 13. check statistics size == limit
-//     auto& variant_stats = variant_column_reader->_statistics;
-//     EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() <
-//                 config::variant_max_sparse_column_statistics_size);
-//     auto limit = config::variant_max_sparse_column_statistics_size -
-//                  variant_stats->sparse_column_non_null_size.size();
-//     for (int i = 0; i < limit; ++i) {
-//         std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
-//         variant_stats->sparse_column_non_null_size[key] = 10000;
-//     }
-//     EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() ==
-//                 config::variant_max_sparse_column_statistics_size);
-//
-//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-//     st = it->init(column_iter_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//
-//     auto check_empty_column = [&]() {
-//         for (int row = 0; row < 1000; ++row) {
-//             std::string value;
-//             st = assert_cast<ColumnObject*>(new_column_object.get())
-//                          ->serialize_one_row_to_string(row, &value);
-//
-//             EXPECT_TRUE(st.ok()) << st.msg();
-//             EXPECT_EQ(value, "{}");
-//         }
-//     };
-//
-//     read_to_column_object();
-//     check_empty_column();
-//
-//     // construct tablet schema for compaction
-//     storage_read_opts.io_ctx.reader_type = 
ReaderType::READER_BASE_COMPACTION;
-//     storage_read_opts.tablet_schema = _tablet_schema;
-//     std::unordered_map<int32_t, TabletSchema::PathsSetInfo> 
uid_to_paths_set_info;
-//     TabletSchema::PathsSetInfo paths_set_info;
-//     paths_set_info.sub_path_set.insert("key0");
-//     paths_set_info.sub_path_set.insert("key3");
-//     paths_set_info.sub_path_set.insert("key4");
-//     paths_set_info.sparse_path_set.insert("key1");
-//     paths_set_info.sparse_path_set.insert("key2");
-//     paths_set_info.sparse_path_set.insert("key5");
-//     paths_set_info.sparse_path_set.insert("key6");
-//     paths_set_info.sparse_path_set.insert("key7");
-//     paths_set_info.sparse_path_set.insert("key8");
-//     paths_set_info.sparse_path_set.insert("key9");
-//     uid_to_paths_set_info[parent_column.unique_id()] = paths_set_info;
-//     _tablet_schema->set_path_set_info(std::move(uid_to_paths_set_info));
-//
-//     // 14. check compaction subcolumn reader
-//     check_leaf_reader();
-//
-//     // 15. check compaction root reader
-//     st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<VariantRootColumnIterator*>(it) != nullptr);
-//     st = it->init(column_iter_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//
-//     // 16. check compacton sparse column
-//     TabletColumn sparse_column = 
schema_util::create_sparse_column(parent_column);
-//     st = variant_column_reader->new_iterator(&it, sparse_column, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<SparseColumnMergeReader*>(it) != nullptr);
-//     st = it->init(column_iter_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     auto column_type = 
DataTypeFactory::instance().create_data_type(sparse_column, false);
-//     auto read_column = column_type->create_column();
-//     nrows = 1000;
-//     st = it->seek_to_ordinal(0);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = it->next_batch(&nrows, read_column);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(stats.bytes_read > 0);
-//
-//     for (int row = 0; row < 1000; ++row) {
-//         const std::string& value = column_type->to_string(*read_column, 
row);
-//         EXPECT_TRUE(value.find("key0") == std::string::npos)
-//                 << "row: " << row << ", value: " << value;
-//         EXPECT_TRUE(value.find("key3") == std::string::npos)
-//                 << "row: " << row << ", value: " << value;
-//         EXPECT_TRUE(value.find("key4") == std::string::npos)
-//                 << "row: " << row << ", value: " << value;
-//     }
-//
-//     // 17. check limit = 10000
-//     subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-//     subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
-//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-//
-//     for (int i = 0; i < limit; ++i) {
-//         std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
-//         variant_stats->sparse_column_non_null_size.erase(key);
-//     }
-//
-//     // 18. check compacton sparse extract column
-//     subcolumn.set_name(parent_column.name_lower_case() + ".key3");
-//     subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key3"));
-//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-//
-//     // 19. check compaction default column
-//     subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-//     subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
-//     st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
-//     
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-// }
-//
-// TEST_F(VariantColumnWriterReaderTest, test_write_data_advanced) {
-//     // 1. create tablet_schema
-//     TabletSchemaPB schema_pb;
-//     schema_pb.set_keys_type(KeysType::DUP_KEYS);
-//     SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", 
"V1", 10);
-//     _tablet_schema = std::make_shared<TabletSchema>();
-//     _tablet_schema->init_from_pb(schema_pb);
-//
-//     // 2. create tablet
-//     TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
-//     tablet_meta->_tablet_id = 10000;
-//     _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
-//     EXPECT_TRUE(_tablet->init().ok());
-//     
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-//     
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
-//
-//     // 3. create file_writer
-//     io::FileWriterPtr file_writer;
-//     auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
-//     auto st = io::global_local_filesystem()->create_file(file_path, 
&file_writer);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//
-//     // 4. create column_writer
-//     SegmentFooterPB footer;
-//     ColumnWriterOptions opts;
-//     opts.meta = footer.add_columns();
-//     opts.compression_type = CompressionTypePB::LZ4;
-//     opts.file_writer = file_writer.get();
-//     opts.footer = &footer;
-//     RowsetWriterContext rowset_ctx;
-//     rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
-//     opts.rowset_ctx = &rowset_ctx;
-//     opts.rowset_ctx->tablet_schema = _tablet_schema;
-//     TabletColumn column = _tablet_schema->column(0);
-//     _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
-//
-//     std::unique_ptr<ColumnWriter> writer;
-//     EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(), 
&writer).ok());
-//     EXPECT_TRUE(writer->init().ok());
-//     EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
-//
-//     // 5. write data
-//     auto olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
-//     auto block = _tablet_schema->create_block();
-//     auto column_object = 
(*std::move(block.get_by_position(0).column)).mutate();
-//     std::unordered_map<int, std::string> inserted_jsonstr;
-//     auto path_with_size = 
VariantUtil::fill_object_column_with_nested_test_data(column_object, 1000,
-//                                                                             
    &inserted_jsonstr);
-//     olap_data_convertor->add_column_data_convertor(column);
-//     olap_data_convertor->set_source_content(&block, 0, 1000);
-//     auto [result, accessor] = olap_data_convertor->convert_column_data(0);
-//     EXPECT_TRUE(result.ok());
-//     EXPECT_TRUE(accessor != nullptr);
-//     EXPECT_TRUE(writer->append(accessor->get_nullmap(), 
accessor->get_data(), 1000).ok());
-//     st = writer->finish();
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = writer->write_data();
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = writer->write_ordinal_index();
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = writer->write_zone_map();
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(file_writer->close().ok());
-//     footer.set_num_rows(1000);
-//
-//     // 6. check footer
-//     EXPECT_EQ(footer.columns_size(), 12);
-//     auto column_meta = footer.columns(0);
-//     EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
-//
-//     for (int i = 1; i < footer.columns_size() - 1; ++i) {
-//         auto column_meta = footer.columns(i);
-//         check_column_meta(column_meta, path_with_size);
-//     }
-//     check_sparse_column_meta(footer.columns(footer.columns_size() - 1), 
path_with_size);
-//
-//     // 7. check variant reader
-//     io::FileReaderSPtr file_reader;
-//     st = io::global_local_filesystem()->open_file(file_path, &file_reader);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     ColumnReaderOptions read_opts;
-//     std::unique_ptr<ColumnReader> column_reader;
-//     st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader, 
&column_reader);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//
-//     auto variant_column_reader = 
assert_cast<VariantColumnReader*>(column_reader.get());
-//     EXPECT_TRUE(variant_column_reader != nullptr);
-//
-//     // 8. check statistics
-//     auto statistics = variant_column_reader->get_stats();
-//     for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
-//         std::cout << "path: " << path << ", size: " << size << std::endl;
-//         EXPECT_EQ(path_with_size[path], size);
-//     }
-//     for (const auto& [path, size] : 
statistics->sparse_column_non_null_size) {
-//         std::cout << "sparse path: " << path << ", size: " << size << 
std::endl;
-//         EXPECT_EQ(path_with_size[path], size);
-//     }
-//
-//     // 9. check root
-//     ColumnIterator* it;
-//     TabletColumn parent_column = _tablet_schema->column(0);
-//     StorageReadOptions storage_read_opts;
-//     storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
-//     st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-//     ColumnIteratorOptions column_iter_opts;
-//     OlapReaderStatistics stats;
-//     column_iter_opts.stats = &stats;
-//     column_iter_opts.file_reader = file_reader.get();
-//     st = it->init(column_iter_opts);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//
-//     MutableColumnPtr new_column_object = ColumnObject::create(3);
-//     size_t nrows = 1000;
-//     st = it->seek_to_ordinal(0);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     st = it->next_batch(&nrows, new_column_object);
-//     EXPECT_TRUE(st.ok()) << st.msg();
-//     EXPECT_TRUE(stats.bytes_read > 0);
-//
-//     for (int i = 0; i < 1000; ++i) {
-//         std::string value;
-//         st = assert_cast<ColumnObject*>(new_column_object.get())
-//                      ->serialize_one_row_to_string(i, &value);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         EXPECT_EQ(value, inserted_jsonstr[i]);
-//     }
-//
-//     auto read_to_column_object = [&]() {
-//         new_column_object = ColumnObject::create(10);
-//         nrows = 1000;
-//         st = it->seek_to_ordinal(0);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         st = it->next_batch(&nrows, new_column_object);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         EXPECT_TRUE(stats.bytes_read > 0);
-//         EXPECT_EQ(nrows, 1000);
-//     };
-//
-//     auto check_key_stats = [&](const std::string& key_num) {
-//         std::string key = ".key" + key_num;
-//         TabletColumn subcolumn_in_nested;
-//         subcolumn_in_nested.set_name(parent_column.name_lower_case() + key);
-//         subcolumn_in_nested.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-//         subcolumn_in_nested.set_parent_unique_id(parent_column.unique_id());
-//         
subcolumn_in_nested.set_path_info(PathInData(parent_column.name_lower_case() + 
key));
-//         subcolumn_in_nested.set_variant_max_subcolumns_count(
-//                 parent_column.variant_max_subcolumns_count());
-//         subcolumn_in_nested.set_is_nullable(true);
-//
-//         st = variant_column_reader->new_iterator(&it, subcolumn_in_nested, 
&storage_read_opts);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-//         st = it->init(column_iter_opts);
-//         EXPECT_TRUE(st.ok()) << st.msg();
-//         read_to_column_object();
-//
-//         size_t key_count = 0;
-//         size_t key_nested_count = 0;
-//         for (int row = 0; row < 1000; ++row) {
-//             std::string value;
-//             st = assert_cast<ColumnObject*>(new_column_object.get())
-//                          ->serialize_one_row_to_string(row, &value);
-//             EXPECT_TRUE(st.ok()) << st.msg();
-//             if (value.find("nested" + key_num) != std::string::npos) {
-//                 key_nested_count++;
-//             } else if (value.find("88") != std::string::npos) {
-//                 key_count++;
-//             }
-//         }
-//         EXPECT_EQ(key_count, path_with_size["key" + key_num]);
-//         EXPECT_EQ(key_nested_count, path_with_size["key" + key_num + 
".nested" + key_num]);
-//     };
-//
-//     for (int i = 3; i < 10; ++i) {
-//         check_key_stats(std::to_string(i));
-//     }
-//
-//     
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-// }
-//
-// } // namespace doris
\ No newline at end of file
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+#include "olap/rowset/segment_v2/variant_column_writer_impl.h"
+#include "olap/storage_engine.h"
+#include "testutil/schema_utils.h"
+#include "testutil/variant_util.h"
+
+using namespace doris::vectorized;
+
+namespace doris {
+
+constexpr static uint32_t MAX_PATH_LEN = 1024;
+constexpr static std::string_view dest_dir = 
"/ut_dir/variant_column_writer_test";
+constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
+
+class VariantColumnWriterReaderTest : public testing::Test {
+public:
+    void SetUp() override {
+        // absolute dir
+        char buffer[MAX_PATH_LEN];
+        EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+        _current_dir = std::string(buffer);
+        _absolute_dir = _current_dir + std::string(dest_dir);
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+        
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
+
+        // tmp dir
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+        
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
+        std::vector<StorePath> paths;
+        paths.emplace_back(std::string(tmp_dir), 1024000000);
+        auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
+        Status st = tmp_file_dirs->init();
+        EXPECT_TRUE(st.ok()) << st.to_json();
+        ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
+
+        // storage engine
+        doris::EngineOptions options;
+        auto engine = std::make_unique<StorageEngine>(options);
+        _engine_ref = engine.get();
+        _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
+        static_cast<void>(_data_dir->update_capacity());
+        ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
+    }
+
+    void TearDown() override {
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+        _engine_ref = nullptr;
+        ExecEnv::GetInstance()->set_storage_engine(nullptr);
+    }
+
+    VariantColumnWriterReaderTest() = default;
+    ~VariantColumnWriterReaderTest() override = default;
+
+private:
+    TabletSchemaSPtr _tablet_schema = nullptr;
+    StorageEngine* _engine_ref = nullptr;
+    std::unique_ptr<DataDir> _data_dir = nullptr;
+    TabletSharedPtr _tablet = nullptr;
+    std::string _absolute_dir;
+    std::string _current_dir;
+};
+
+void check_column_meta(const ColumnMetaPB& column_meta, auto& path_with_size) {
+    EXPECT_TRUE(column_meta.has_column_path_info());
+    auto path = std::make_shared<vectorized::PathInData>();
+    path->from_protobuf(column_meta.column_path_info());
+    EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
+    EXPECT_EQ(column_meta.none_null_size(), 
path_with_size[path->copy_pop_front().get_path()]);
+}
+
+void check_sparse_column_meta(const ColumnMetaPB& column_meta, auto& 
path_with_size) {
+    EXPECT_TRUE(column_meta.has_column_path_info());
+    auto path = std::make_shared<vectorized::PathInData>();
+    path->from_protobuf(column_meta.column_path_info());
+    EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
+    for (const auto& [path, size] :
+         column_meta.variant_statistics().sparse_column_non_null_size()) {
+        EXPECT_EQ(size, path_with_size[path]);
+    }
+    EXPECT_EQ(path->copy_pop_front().get_path(), "__DORIS_VARIANT_SPARSE__");
+}
+
+TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) {
+    // 1. create tablet_schema
+    TabletSchemaPB schema_pb;
+    schema_pb.set_keys_type(KeysType::DUP_KEYS);
+    SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", "V1");
+    _tablet_schema = std::make_shared<TabletSchema>();
+    _tablet_schema->init_from_pb(schema_pb);
+
+    // 2. create tablet
+    TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+    tablet_meta->_tablet_id = 10000;
+    _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
+
+    EXPECT_TRUE(_tablet->init().ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+    // 3. create file_writer
+    io::FileWriterPtr file_writer;
+    auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+    auto st = io::global_local_filesystem()->create_file(file_path, 
&file_writer);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    // 4. create column_writer
+    SegmentFooterPB footer;
+    ColumnWriterOptions opts;
+    opts.meta = footer.add_columns();
+    opts.compression_type = CompressionTypePB::LZ4;
+    opts.file_writer = file_writer.get();
+    opts.footer = &footer;
+    RowsetWriterContext rowset_ctx;
+    rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+    opts.rowset_ctx = &rowset_ctx;
+    opts.rowset_ctx->tablet_schema = _tablet_schema;
+    TabletColumn column = _tablet_schema->column(0);
+    _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
+
+    std::unique_ptr<ColumnWriter> writer;
+    EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(), 
&writer).ok());
+    EXPECT_TRUE(writer->init().ok());
+    EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
+
+    // 5. write data
+    auto olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
+    auto block = _tablet_schema->create_block();
+    auto column_object = 
(*std::move(block.get_by_position(0).column)).mutate();
+    std::unordered_map<int, std::string> inserted_jsonstr;
+    auto path_with_size =
+            VariantUtil::fill_object_column_with_test_data(column_object, 
1000, &inserted_jsonstr);
+    olap_data_convertor->add_column_data_convertor(column);
+    olap_data_convertor->set_source_content(&block, 0, 1000);
+    auto [result, accessor] = olap_data_convertor->convert_column_data(0);
+    EXPECT_TRUE(result.ok());
+    EXPECT_TRUE(accessor != nullptr);
+    EXPECT_TRUE(writer->append(accessor->get_nullmap(), accessor->get_data(), 
1000).ok());
+    st = writer->finish();
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = writer->write_data();
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = writer->write_ordinal_index();
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = writer->write_zone_map();
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(file_writer->close().ok());
+    footer.set_num_rows(1000);
+
+    // 6. check footer
+    EXPECT_EQ(footer.columns_size(), 5);
+    auto column_meta = footer.columns(0);
+    EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    for (int i = 1; i < footer.columns_size() - 1; ++i) {
+        auto column_meta = footer.columns(i);
+        check_column_meta(column_meta, path_with_size);
+    }
+    check_sparse_column_meta(footer.columns(footer.columns_size() - 1), 
path_with_size);
+
+    // 7. check variant reader
+    io::FileReaderSPtr file_reader;
+    st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    ColumnReaderOptions read_opts;
+    read_opts.tablet_schema = _tablet_schema;
+    std::unique_ptr<ColumnReader> column_reader;
+    st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader, 
&column_reader);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    auto variant_column_reader = 
assert_cast<VariantColumnReader*>(column_reader.get());
+    EXPECT_TRUE(variant_column_reader != nullptr);
+
+    auto subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key0"));
+    EXPECT_TRUE(subcolumn_reader != nullptr);
+    subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key1"));
+    EXPECT_TRUE(subcolumn_reader != nullptr);
+    subcolumn_reader = 
variant_column_reader->get_reader_by_path(PathInData("key2"));
+    EXPECT_TRUE(subcolumn_reader != nullptr);
+    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key3")));
+    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key4")));
+    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key5")));
+    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key6")));
+    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key7")));
+    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key8")));
+    
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key9")));
+    auto size = variant_column_reader->get_metadata_size();
+    EXPECT_GT(size, 0);
+
+    // 8. check statistics
+    auto statistics = variant_column_reader->get_stats();
+    for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
+        EXPECT_EQ(path_with_size[path], size);
+    }
+    for (const auto& [path, size] : statistics->sparse_column_non_null_size) {
+        EXPECT_EQ(path_with_size[path], size);
+    }
+
+    // 9. check hier reader
+    ColumnIterator* it;
+    TabletColumn parent_column = _tablet_schema->column(0);
+    StorageReadOptions storage_read_opts;
+    storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
+    st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+    ColumnIteratorOptions column_iter_opts;
+    OlapReaderStatistics stats;
+    column_iter_opts.stats = &stats;
+    column_iter_opts.file_reader = file_reader.get();
+    st = it->init(column_iter_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    MutableColumnPtr new_column_object = ColumnObject::create(3);
+    size_t nrows = 1000;
+    st = it->seek_to_ordinal(0);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = it->next_batch(&nrows, new_column_object);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(stats.bytes_read > 0);
+
+    for (int i = 0; i < 1000; ++i) {
+        std::string value;
+        st = assert_cast<ColumnObject*>(new_column_object.get())
+                     ->serialize_one_row_to_string(i, &value);
+
+        EXPECT_TRUE(st.ok()) << st.msg();
+        EXPECT_EQ(value, inserted_jsonstr[i]);
+    }
+
+    std::vector<rowid_t> row_ids;
+    for (int i = 0; i < 1000; ++i) {
+        if (i % 7 == 0) {
+            row_ids.push_back(i);
+        }
+    }
+    new_column_object = ColumnObject::create(3);
+    st = it->read_by_rowids(row_ids.data(), row_ids.size(), new_column_object);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    for (int i = 0; i < row_ids.size(); ++i) {
+        std::string value;
+        st = assert_cast<ColumnObject*>(new_column_object.get())
+                     ->serialize_one_row_to_string(i, &value);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        EXPECT_EQ(value, inserted_jsonstr[row_ids[i]]);
+    }
+
+    auto read_to_column_object = [&]() {
+        new_column_object = ColumnObject::create(3);
+        nrows = 1000;
+        st = it->seek_to_ordinal(0);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        st = it->next_batch(&nrows, new_column_object);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        EXPECT_TRUE(stats.bytes_read > 0);
+        EXPECT_EQ(nrows, 1000);
+    };
+
+    // 10. check sparse extract reader
+    for (int i = 3; i < 10; ++i) {
+        std::string key = ".key" + std::to_string(i);
+        TabletColumn subcolumn_in_sparse;
+        subcolumn_in_sparse.set_name(parent_column.name_lower_case() + key);
+        subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+        subcolumn_in_sparse.set_parent_unique_id(parent_column.unique_id());
+        
subcolumn_in_sparse.set_path_info(PathInData(parent_column.name_lower_case() + 
key));
+        subcolumn_in_sparse.set_variant_max_subcolumns_count(
+                parent_column.variant_max_subcolumns_count());
+        subcolumn_in_sparse.set_is_nullable(true);
+
+        st = variant_column_reader->new_iterator(&it, subcolumn_in_sparse, 
&storage_read_opts);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+        st = it->init(column_iter_opts);
+        EXPECT_TRUE(st.ok()) << st.msg();
+
+        read_to_column_object();
+
+        for (int row = 0; row < 1000; ++row) {
+            std::string value;
+            st = assert_cast<ColumnObject*>(new_column_object.get())
+                         ->serialize_one_row_to_string(row, &value);
+            EXPECT_TRUE(st.ok()) << st.msg();
+            if (inserted_jsonstr[row].find(key) != std::string::npos) {
+                if (i % 2 == 0) {
+                    EXPECT_EQ(value, "88");
+                } else {
+                    EXPECT_EQ(value, "str99");
+                }
+            }
+        }
+    }
+
+    // 11. check leaf reader
+    auto check_leaf_reader = [&]() {
+        for (int i = 0; i < 3; ++i) {
+            std::string key = ".key" + std::to_string(i);
+            TabletColumn subcolumn;
+            subcolumn.set_name(parent_column.name_lower_case() + key);
+            subcolumn.set_type((FieldType)(int)footer.columns(i + 1).type());
+            subcolumn.set_parent_unique_id(parent_column.unique_id());
+            subcolumn.set_path_info(PathInData(parent_column.name_lower_case() 
+ key));
+            subcolumn.set_variant_max_subcolumns_count(
+                    parent_column.variant_max_subcolumns_count());
+            subcolumn.set_is_nullable(true);
+
+            st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+            EXPECT_TRUE(st.ok()) << st.msg();
+            EXPECT_TRUE(assert_cast<FileColumnIterator*>(it) != nullptr);
+            st = it->init(column_iter_opts);
+            EXPECT_TRUE(st.ok()) << st.msg();
+
+            auto column_type = 
DataTypeFactory::instance().create_data_type(subcolumn, false);
+            auto read_column = column_type->create_column();
+            nrows = 1000;
+            st = it->seek_to_ordinal(0);
+            EXPECT_TRUE(st.ok()) << st.msg();
+            st = it->next_batch(&nrows, read_column);
+            EXPECT_TRUE(st.ok()) << st.msg();
+            EXPECT_TRUE(stats.bytes_read > 0);
+
+            for (int row = 0; row < 1000; ++row) {
+                const std::string& value = 
column_type->to_string(*read_column, row);
+                if (inserted_jsonstr[row].find(key) != std::string::npos) {
+                    if (i % 2 == 0) {
+                        EXPECT_EQ(value, "88");
+                    } else {
+                        EXPECT_EQ(value, "str99");
+                    }
+                }
+            }
+        }
+    };
+    check_leaf_reader();
+
+    // 12. check empty
+    TabletColumn subcolumn;
+    subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+    subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    subcolumn.set_parent_unique_id(parent_column.unique_id());
+    subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
+    subcolumn.set_is_nullable(true);
+    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
+
+    // 13. check statistics size == limit
+    auto& variant_stats = variant_column_reader->_statistics;
+    EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() <
+                config::variant_max_sparse_column_statistics_size);
+    auto limit = config::variant_max_sparse_column_statistics_size -
+                 variant_stats->sparse_column_non_null_size.size();
+    for (int i = 0; i < limit; ++i) {
+        std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
+        variant_stats->sparse_column_non_null_size[key] = 10000;
+    }
+    EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() ==
+                config::variant_max_sparse_column_statistics_size);
+
+    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+    st = it->init(column_iter_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    auto check_empty_column = [&]() {
+        for (int row = 0; row < 1000; ++row) {
+            std::string value;
+            st = assert_cast<ColumnObject*>(new_column_object.get())
+                         ->serialize_one_row_to_string(row, &value);
+
+            EXPECT_TRUE(st.ok()) << st.msg();
+            EXPECT_EQ(value, "{}");
+        }
+    };
+
+    read_to_column_object();
+    check_empty_column();
+
+    // construct tablet schema for compaction
+    storage_read_opts.io_ctx.reader_type = ReaderType::READER_BASE_COMPACTION;
+    storage_read_opts.tablet_schema = _tablet_schema;
+    std::unordered_map<int32_t, TabletSchema::PathsSetInfo> 
uid_to_paths_set_info;
+    TabletSchema::PathsSetInfo paths_set_info;
+    paths_set_info.sub_path_set.insert("key0");
+    paths_set_info.sub_path_set.insert("key3");
+    paths_set_info.sub_path_set.insert("key4");
+    paths_set_info.sparse_path_set.insert("key1");
+    paths_set_info.sparse_path_set.insert("key2");
+    paths_set_info.sparse_path_set.insert("key5");
+    paths_set_info.sparse_path_set.insert("key6");
+    paths_set_info.sparse_path_set.insert("key7");
+    paths_set_info.sparse_path_set.insert("key8");
+    paths_set_info.sparse_path_set.insert("key9");
+    uid_to_paths_set_info[parent_column.unique_id()] = paths_set_info;
+    _tablet_schema->set_path_set_info(std::move(uid_to_paths_set_info));
+
+    // 14. check compaction subcolumn reader
+    check_leaf_reader();
+
+    // 15. check compaction root reader
+    st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<VariantRootColumnIterator*>(it) != nullptr);
+    st = it->init(column_iter_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    // 16. check compacton sparse column
+    TabletColumn sparse_column = 
schema_util::create_sparse_column(parent_column);
+    st = variant_column_reader->new_iterator(&it, sparse_column, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<SparseColumnMergeReader*>(it) != nullptr);
+    st = it->init(column_iter_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    auto column_type = 
DataTypeFactory::instance().create_data_type(sparse_column, false);
+    auto read_column = column_type->create_column();
+    nrows = 1000;
+    st = it->seek_to_ordinal(0);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = it->next_batch(&nrows, read_column);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(stats.bytes_read > 0);
+
+    for (int row = 0; row < 1000; ++row) {
+        const std::string& value = column_type->to_string(*read_column, row);
+        EXPECT_TRUE(value.find("key0") == std::string::npos)
+                << "row: " << row << ", value: " << value;
+        EXPECT_TRUE(value.find("key3") == std::string::npos)
+                << "row: " << row << ", value: " << value;
+        EXPECT_TRUE(value.find("key4") == std::string::npos)
+                << "row: " << row << ", value: " << value;
+    }
+
+    // 17. check limit = 10000
+    subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+    subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
+    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+
+    for (int i = 0; i < limit; ++i) {
+        std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
+        variant_stats->sparse_column_non_null_size.erase(key);
+    }
+
+    // 18. check compacton sparse extract column
+    subcolumn.set_name(parent_column.name_lower_case() + ".key3");
+    subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key3"));
+    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+
+    // 19. check compaction default column
+    subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+    subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + 
".key10"));
+    st = variant_column_reader->new_iterator(&it, subcolumn, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+}
+
+TEST_F(VariantColumnWriterReaderTest, test_write_data_advanced) {
+    // 1. create tablet_schema
+    TabletSchemaPB schema_pb;
+    schema_pb.set_keys_type(KeysType::DUP_KEYS);
+    SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", "V1", 
10);
+    _tablet_schema = std::make_shared<TabletSchema>();
+    _tablet_schema->init_from_pb(schema_pb);
+
+    // 2. create tablet
+    TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+    tablet_meta->_tablet_id = 10000;
+    _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
+    EXPECT_TRUE(_tablet->init().ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+    // 3. create file_writer
+    io::FileWriterPtr file_writer;
+    auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+    auto st = io::global_local_filesystem()->create_file(file_path, 
&file_writer);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    // 4. create column_writer
+    SegmentFooterPB footer;
+    ColumnWriterOptions opts;
+    opts.meta = footer.add_columns();
+    opts.compression_type = CompressionTypePB::LZ4;
+    opts.file_writer = file_writer.get();
+    opts.footer = &footer;
+    RowsetWriterContext rowset_ctx;
+    rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+    opts.rowset_ctx = &rowset_ctx;
+    opts.rowset_ctx->tablet_schema = _tablet_schema;
+    TabletColumn column = _tablet_schema->column(0);
+    _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
+
+    std::unique_ptr<ColumnWriter> writer;
+    EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(), 
&writer).ok());
+    EXPECT_TRUE(writer->init().ok());
+    EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
+
+    // 5. write data
+    auto olap_data_convertor = 
std::make_unique<vectorized::OlapBlockDataConvertor>();
+    auto block = _tablet_schema->create_block();
+    auto column_object = 
(*std::move(block.get_by_position(0).column)).mutate();
+    std::unordered_map<int, std::string> inserted_jsonstr;
+    auto path_with_size = 
VariantUtil::fill_object_column_with_nested_test_data(column_object, 1000,
+                                                                               
 &inserted_jsonstr);
+    olap_data_convertor->add_column_data_convertor(column);
+    olap_data_convertor->set_source_content(&block, 0, 1000);
+    auto [result, accessor] = olap_data_convertor->convert_column_data(0);
+    EXPECT_TRUE(result.ok());
+    EXPECT_TRUE(accessor != nullptr);
+    EXPECT_TRUE(writer->append(accessor->get_nullmap(), accessor->get_data(), 
1000).ok());
+    st = writer->finish();
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = writer->write_data();
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = writer->write_ordinal_index();
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = writer->write_zone_map();
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(file_writer->close().ok());
+    footer.set_num_rows(1000);
+
+    // 6. check footer
+    EXPECT_EQ(footer.columns_size(), 12);
+    auto column_meta = footer.columns(0);
+    EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    for (int i = 1; i < footer.columns_size() - 1; ++i) {
+        auto column_meta = footer.columns(i);
+        check_column_meta(column_meta, path_with_size);
+    }
+    check_sparse_column_meta(footer.columns(footer.columns_size() - 1), 
path_with_size);
+
+    // 7. check variant reader
+    io::FileReaderSPtr file_reader;
+    st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    ColumnReaderOptions read_opts;
+    read_opts.tablet_schema = _tablet_schema;
+    std::unique_ptr<ColumnReader> column_reader;
+    st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader, 
&column_reader);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    auto variant_column_reader = 
assert_cast<VariantColumnReader*>(column_reader.get());
+    EXPECT_TRUE(variant_column_reader != nullptr);
+
+    // 8. check statistics
+    auto statistics = variant_column_reader->get_stats();
+    for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
+        EXPECT_EQ(path_with_size[path], size);
+    }
+    for (const auto& [path, size] : statistics->sparse_column_non_null_size) {
+        EXPECT_EQ(path_with_size[path], size);
+    }
+
+    // 9. check root
+    ColumnIterator* it;
+    TabletColumn parent_column = _tablet_schema->column(0);
+    StorageReadOptions storage_read_opts;
+    storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
+    st = variant_column_reader->new_iterator(&it, parent_column, 
&storage_read_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+    ColumnIteratorOptions column_iter_opts;
+    OlapReaderStatistics stats;
+    column_iter_opts.stats = &stats;
+    column_iter_opts.file_reader = file_reader.get();
+    st = it->init(column_iter_opts);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    MutableColumnPtr new_column_object = ColumnObject::create(3);
+    size_t nrows = 1000;
+    st = it->seek_to_ordinal(0);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    st = it->next_batch(&nrows, new_column_object);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    EXPECT_TRUE(stats.bytes_read > 0);
+
+    for (int i = 0; i < 1000; ++i) {
+        std::string value;
+        st = assert_cast<ColumnObject*>(new_column_object.get())
+                     ->serialize_one_row_to_string(i, &value);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        EXPECT_EQ(value, inserted_jsonstr[i]);
+    }
+
+    auto read_to_column_object = [&]() {
+        new_column_object = ColumnObject::create(10);
+        nrows = 1000;
+        st = it->seek_to_ordinal(0);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        st = it->next_batch(&nrows, new_column_object);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        EXPECT_TRUE(stats.bytes_read > 0);
+        EXPECT_EQ(nrows, 1000);
+    };
+
+    auto check_key_stats = [&](const std::string& key_num) {
+        std::string key = ".key" + key_num;
+        TabletColumn subcolumn_in_nested;
+        subcolumn_in_nested.set_name(parent_column.name_lower_case() + key);
+        subcolumn_in_nested.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+        subcolumn_in_nested.set_parent_unique_id(parent_column.unique_id());
+        
subcolumn_in_nested.set_path_info(PathInData(parent_column.name_lower_case() + 
key));
+        subcolumn_in_nested.set_variant_max_subcolumns_count(
+                parent_column.variant_max_subcolumns_count());
+        subcolumn_in_nested.set_is_nullable(true);
+
+        st = variant_column_reader->new_iterator(&it, subcolumn_in_nested, 
&storage_read_opts);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+        st = it->init(column_iter_opts);
+        EXPECT_TRUE(st.ok()) << st.msg();
+        read_to_column_object();
+
+        size_t key_count = 0;
+        size_t key_nested_count = 0;
+        for (int row = 0; row < 1000; ++row) {
+            std::string value;
+            st = assert_cast<ColumnObject*>(new_column_object.get())
+                         ->serialize_one_row_to_string(row, &value);
+            EXPECT_TRUE(st.ok()) << st.msg();
+            if (value.find("nested" + key_num) != std::string::npos) {
+                key_nested_count++;
+            } else if (value.find("88") != std::string::npos) {
+                key_count++;
+            }
+        }
+        EXPECT_EQ(key_count, path_with_size["key" + key_num]);
+        EXPECT_EQ(key_nested_count, path_with_size["key" + key_num + ".nested" 
+ key_num]);
+    };
+
+    for (int i = 3; i < 10; ++i) {
+        check_key_stats(std::to_string(i));
+    }
+
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+}
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/test/vec/columns/column_object_test.cpp 
b/be/test/vec/columns/column_object_test.cpp
index b08e57dd19e..7fd827c8919 100644
--- a/be/test/vec/columns/column_object_test.cpp
+++ b/be/test/vec/columns/column_object_test.cpp
@@ -5,9 +5,9 @@
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
-//
+
 //   http://www.apache.org/licenses/LICENSE-2.0
-//
+
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -15,790 +15,800 @@
 // specific language governing permissions and limitations
 // under the License.
 
-// #include "vec/columns/column_object.h"
-//
-// #include <gmock/gmock-more-matchers.h>
-// #include <gtest/gtest.h>
-// #include <rapidjson/prettywriter.h>
-// #include <stdio.h>
-//
-// #include "runtime/jsonb_value.h"
-// #include "testutil/variant_util.h"
-// #include "vec/common/string_ref.h"
-// #include "vec/core/field.h"
-// #include "vec/core/types.h"
-// #include "vec/data_types/data_type_array.h"
-// #include "vec/data_types/data_type_factory.hpp"
-//
-// using namespace doris::vectorized;
-//
-// using namespace doris;
-// // #define ADD_SUB_COLUMN(key) \
-// //    varaint->add_sub_column(PathInData(std::string_view(key)), 0);
-//
-// void convert_field_to_rapidjson(const vectorized::Field& field, 
rapidjson::Value& target,
-//                                 rapidjson::Document::AllocatorType& 
allocator) {
-//     switch (field.get_type()) {
-//     case vectorized::Field::Types::Null:
-//         target.SetNull();
-//         break;
-//     case vectorized::Field::Types::Int64:
-//         target.SetInt64(field.get<Int64>());
-//         break;
-//     case vectorized::Field::Types::Float64:
-//         target.SetDouble(field.get<Float64>());
-//         break;
-//     case vectorized::Field::Types::JSONB: {
-//         const auto& val = field.get<JsonbField>();
-//         JsonbValue* json_val = JsonbDocument::createValue(val.get_value(), 
val.get_size());
-//         convert_jsonb_to_rapidjson(*json_val, target, allocator);
-//         break;
-//     }
-//     case vectorized::Field::Types::String: {
-//         const String& val = field.get<String>();
-//         target.SetString(val.data(), 
cast_set<rapidjson::SizeType>(val.size()));
-//         break;
-//     }
-//     case vectorized::Field::Types::Array: {
-//         const vectorized::Array& array = field.get<Array>();
-//         target.SetArray();
-//         for (const vectorized::Field& item : array) {
-//             rapidjson::Value val;
-//             convert_field_to_rapidjson(item, val, allocator);
-//             target.PushBack(val, allocator);
-//         }
-//         break;
-//     }
-//     case vectorized::Field::Types::VariantMap: {
-//         const vectorized::VariantMap& map = field.get<VariantMap>();
-//         target.SetObject();
-//         for (const auto& item : map) {
-//             if (item.second.is_null()) {
-//                 continue;
-//             }
-//             rapidjson::Value key;
-//             key.SetString(item.first.get_path().data(),
-//                           
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
-//             rapidjson::Value val;
-//             convert_field_to_rapidjson(item.second, val, allocator);
-//             if (val.IsNull() && item.first.empty()) {
-//                 // skip null value with empty key, indicate the null json 
value of root in variant map,
-//                 // usally padding in nested arrays
-//                 continue;
-//             }
-//             target.AddMember(key, val, allocator);
-//         }
-//         break;
-//     }
-//     default:
-//         throw doris::Exception(ErrorCode::INTERNAL_ERROR, "unkown field 
type: {}",
-//                                field.get_type_name());
-//         break;
-//     }
-// }
-//
-// void convert_variant_map_to_rapidjson(const vectorized::VariantMap& map, 
rapidjson::Value& target,
-//                                       rapidjson::Document::AllocatorType& 
allocator) {
-//     target.SetObject();
-//     for (const auto& item : map) {
-//         if (item.second.is_null()) {
-//             continue;
-//         }
-//         rapidjson::Value key;
-//         key.SetString(item.first.get_path().data(),
-//                       
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
-//         rapidjson::Value val;
-//         convert_field_to_rapidjson(item.second, val, allocator);
-//         if (val.IsNull() && item.first.empty()) {
-//             // skip null value with empty key, indicate the null json value 
of root in variant map,
-//             // usally padding in nested arrays
-//             continue;
-//         }
-//         target.AddMember(key, val, allocator);
-//     }
-// }
-//
-// void convert_array_to_rapidjson(const vectorized::Array& array, 
rapidjson::Value& target,
-//                                 rapidjson::Document::AllocatorType& 
allocator) {
-//     target.SetArray();
-//     for (const vectorized::Field& item : array) {
-//         rapidjson::Value val;
-//         convert_field_to_rapidjson(item, val, allocator);
-//         target.PushBack(val, allocator);
-//     }
-// }
-//
-// TEST(ColumnVariantTest, insert_try_insert) {
-//     auto v = VariantUtil::construct_dst_varint_column();
-//     FieldInfo info;
-//     info.scalar_type_id = TypeIndex::Nothing;
-//     info.num_dimensions = 0;
-//     PathInData path("v.f");
-//     auto sub = v->get_subcolumn(path);
-//     Int64 value = 43;
-//     sub->insert(value, info);
-//
-//     info.num_dimensions = 1;
-//     sub->insert(value, info);
-//
-//     info.num_dimensions = 2;
-//     sub->insert(value, info);
-// }
-//
-// TEST(ColumnVariantTest, basic_finalize) {
-//     auto variant = VariantUtil::construct_basic_varint_column();
-//     // 4. finalize
-//     
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-//     EXPECT_EQ(variant->size(), 10);
-//
-//     // check finalized subcolumn
-//     // 5 subcolumn + 1 root
-//     EXPECT_EQ(variant->subcolumns.size(), 6);
-//     for (const auto& column : variant->subcolumns) {
-//         if (column->data.is_root) {
-//             continue;
-//         }
-//         EXPECT_EQ(column->data.data.size(), 1);
-//     }
-//
-//     // check sparse column
-//     const auto& offsets = variant->serialized_sparse_column_offsets();
-//     for (int row = 0; row < 5; ++row) {
-//         EXPECT_EQ(offsets[row], 0);
-//     }
-//     for (int row = 5; row < 10; ++row) {
-//         EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
-//     }
-// }
-//
-// TEST(ColumnVariantTest, basic_deserialize) {
-//     auto variant = VariantUtil::construct_basic_varint_column();
-//
-//     // 4. finalize
-//     
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-//     EXPECT_EQ(variant->size(), 10);
-//
-//     const auto& [path, value] = variant->get_sparse_data_paths_and_values();
-//     const auto& offsets = variant->serialized_sparse_column_offsets();
-//     for (size_t row = 5; row < 10; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         EXPECT_EQ(data, StringRef("v.b.d", 5));
-//         auto pair = variant->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair.first.get<Int64>(), 30);
-//
-//         auto data2 = path->get_data_at(start);
-//         auto pair2 = variant->deserialize_from_sparse_column(value, 
start++);
-//         EXPECT_EQ(data2, StringRef("v.c.d", 5));
-//         EXPECT_EQ(pair2.first.get<Int64>(), 30);
-//
-//         auto data3 = path->get_data_at(start);
-//         auto pair3 = variant->deserialize_from_sparse_column(value, 
start++);
-//         EXPECT_EQ(data3, StringRef("v.d.d", 5));
-//         EXPECT_EQ(pair3.first.get<String>(), "50");
-//         EXPECT_EQ(start, end);
-//     }
-// }
-//
-// TEST(ColumnVariantTest, basic_inset_range_from) {
-//     auto src = VariantUtil::construct_basic_varint_column();
-//     EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-//     EXPECT_EQ(src->size(), 10);
-//
-//     // dst is an empty column, has 5 subcolumn + 1 root
-//     auto dst = VariantUtil::construct_dst_varint_column();
-//
-//     // subcolumn->subcolumn          v.b v.f v.e
-//     // subcolumn->sparse_column      v.a v.c
-//     // sparse_column->subcolumn      v.b.d v.c.d
-//     // sparse_column->sparse_column  v.d.d
-//     dst->insert_range_from(*src, 0, 10);
-//     dst->finalize();
-//     EXPECT_EQ(dst->size(), 10);
-//
-//     // 5 subcolumn
-//     EXPECT_EQ(dst->subcolumns.size(), 6);
-//     ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
-//     std::sort(
-//             dst_subcolumns.begin(), dst_subcolumns.end(),
-//             [](const auto& lhsItem, const auto& rhsItem) { return 
lhsItem->path < rhsItem->path; });
-//
-//     for (const auto& column : dst_subcolumns) {
-//         if (column->data.is_root) {
-//             continue;
-//         }
-//         EXPECT_EQ(column->data.data.size(), 1);
-//         EXPECT_EQ(column->data.data[0]->size(), 10);
-//         if (column->path.get_path().size() == 3) {
-//             EXPECT_EQ(column->data.get_non_null_value_size(), 10);
-//         } else {
-//             EXPECT_EQ(column->path.get_path().size(), 5);
-//             EXPECT_EQ(column->data.get_non_null_value_size(), 5);
-//             for (size_t row = 0; row != 5; ++row) {
-//                 EXPECT_TRUE(column->data.data[0]->is_null_at(row));
-//             }
-//             for (size_t row = 5; row != 10; ++row) {
-//                 EXPECT_EQ((*column->data.data[0])[row].get<Int64>(), 30);
-//             }
-//         }
-//     }
-//
-//     // check sparse column
-//     const auto& [path, value] = dst->get_sparse_data_paths_and_values();
-//     const auto& offsets = dst->serialized_sparse_column_offsets();
-//
-//     // v.a v.c
-//     for (int row = 0; row < 5; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         EXPECT_EQ(data, StringRef("v.a", 3));
-//         auto pair = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-//         auto data2 = path->get_data_at(start);
-//         EXPECT_EQ(data2, StringRef("v.c", 3));
-//         auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair2.first.get<Int64>(), 20);
-//
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     // v.a v.c v.d.d
-//     for (int row = 5; row < 10; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         EXPECT_EQ(data, StringRef("v.a", 3));
-//         auto pair = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-//         auto data2 = path->get_data_at(start);
-//         EXPECT_EQ(data2, StringRef("v.c", 3));
-//         auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair2.first.get<Int64>(), 20);
-//
-//         auto data3 = path->get_data_at(start);
-//         EXPECT_EQ(data3, StringRef("v.d.d", 5));
-//         auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair3.first.get<String>(), "50");
-//
-//         EXPECT_EQ(start, end);
-//     }
-// }
-//
-// auto convert_to_jsonb_field(auto serde, auto& column) {
-//     vectorized::DataTypeSerDe::FormatOptions options;
-//     options.escape_char = '\\';
-//     auto tmp_col = ColumnString::create();
-//     VectorBufferWriter write_buffer(*tmp_col.get());
-//     EXPECT_TRUE(serde->serialize_column_to_json(column, 0, 1, write_buffer, 
options).ok());
-//
-//     write_buffer.commit();
-//     auto str_ref = tmp_col->get_data_at(0);
-//     Slice data((char*)(str_ref.data), str_ref.size);
-//
-//     auto jsonb_type = 
doris::vectorized::DataTypeFactory::instance().create_data_type(
-//             TypeIndex::JSONB, false);
-//     auto jsonb_serde = jsonb_type->get_serde();
-//     auto jsonb_column = jsonb_type->create_column();
-//
-//     DataTypeSerDe::FormatOptions format_options;
-//     format_options.converted_from_string = true;
-//     EXPECT_TRUE(
-//             jsonb_serde->deserialize_one_cell_from_json(*jsonb_column, 
data, format_options).ok());
-//     auto res = jsonb_column->get_data_at(0);
-//     return JsonbField(res.data, res.size);
-// }
-//
-// auto convert_string_to_jsonb_field(auto& column) {
-//     auto str_ref = column.get_data_at(0);
-//     Slice data((char*)(str_ref.data), str_ref.size);
-//
-//     auto jsonb_type = 
doris::vectorized::DataTypeFactory::instance().create_data_type(
-//             TypeIndex::JSONB, false);
-//     auto jsonb_serde = jsonb_type->get_serde();
-//     auto jsonb_column = jsonb_type->create_column();
-//     DataTypeSerDe::FormatOptions format_options;
-//     format_options.converted_from_string = true;
-//     format_options.escape_char = '\\';
-//
-//     EXPECT_TRUE(
-//             jsonb_serde->deserialize_one_cell_from_json(*jsonb_column, 
data, format_options).ok());
-//     auto res = jsonb_column->get_data_at(0);
-//     return JsonbField(res.data, res.size);
-// }
-//
-// doris::vectorized::Field get_jsonb_field(std::string_view type) {
-//     static std::unordered_map<std::string_view, doris::vectorized::Field> 
field_map;
-//     if (field_map.empty()) {
-//         DataTypePtr data_type_int = 
doris::vectorized::DataTypeFactory::instance().create_data_type(
-//                 TypeIndex::Int8, false);
-//         DataTypePtr data_type_array_int =
-//                 
std::make_shared<doris::vectorized::DataTypeArray>(data_type_int);
-//         auto array_column_int = data_type_array_int->create_column();
-//         array_column_int->insert(VariantUtil::get_field("array_int"));
-//         auto array_serde_int = data_type_array_int->get_serde();
-//         field_map["array_int"] = convert_to_jsonb_field(array_serde_int, 
*array_column_int);
-//
-//         DataTypePtr data_type_str = 
doris::vectorized::DataTypeFactory::instance().create_data_type(
-//                 TypeIndex::String, false);
-//         DataTypePtr data_type_array_str =
-//                 
std::make_shared<doris::vectorized::DataTypeArray>(data_type_str);
-//         auto array_column_str = data_type_array_str->create_column();
-//         array_column_str->insert(VariantUtil::get_field("array_str"));
-//         auto array_serde_str = data_type_array_str->get_serde();
-//         field_map["array_str"] = convert_to_jsonb_field(array_serde_str, 
*array_column_str);
-//
-//         auto column_int = data_type_int->create_column();
-//         column_int->insert(VariantUtil::get_field("int"));
-//         auto serde_int = data_type_int->get_serde();
-//         field_map["int"] = convert_to_jsonb_field(serde_int, *column_int);
-//
-//         // auto column_str = data_type_str->create_column();
-//         // column_str->insert(VariantUtil::get_field("string"));
-//         // field_map["string"] = convert_string_to_jsonb_field(*column_str);
-//     }
-//     return field_map[type];
-// }
-//
-// // std::string convert_jsonb_field_to_string(doris::vectorized::Field 
jsonb) {
-// //     const auto& val = jsonb.get<JsonbField>();
-// //     const JsonbValue* json_val = 
JsonbDocument::createValue(val.get_value(), val.get_size());
-//
-// //     rapidjson::Document doc;
-// //     doc.SetObject();
-// //     rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
-// //     rapidjson::Value json_value;
-// //     convert_jsonb_to_rapidjson(*json_val, json_value, allocator);
-// //     doc.AddMember("value", json_value, allocator);
-// //     rapidjson::StringBuffer buffer;
-// //     rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
-// //     doc.Accept(writer);
-// //     return std::string(buffer.GetString());
-// // }
-//
-// std::string convert_field_to_string(doris::vectorized::Field array) {
+#include "vec/columns/column_object.h"
+
+#include <gmock/gmock-more-matchers.h>
+#include <gtest/gtest.h>
+#include <rapidjson/prettywriter.h>
+#include <stdio.h>
+
+#include "runtime/jsonb_value.h"
+#include "testutil/variant_util.h"
+#include "vec/common/string_ref.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_factory.hpp"
+
+using namespace doris::vectorized;
+
+using namespace doris;
+// #define ADD_SUB_COLUMN(key) \
+//    varaint->add_sub_column(PathInData(std::string_view(key)), 0);
+
+void convert_field_to_rapidjson(const vectorized::Field& field, 
rapidjson::Value& target,
+                                rapidjson::Document::AllocatorType& allocator) 
{
+    switch (field.get_type()) {
+    case vectorized::Field::Types::Null:
+        target.SetNull();
+        break;
+    case vectorized::Field::Types::Int64:
+        target.SetInt64(field.get<Int64>());
+        break;
+    case vectorized::Field::Types::Float64:
+        target.SetDouble(field.get<Float64>());
+        break;
+    case vectorized::Field::Types::JSONB: {
+        const auto& val = field.get<JsonbField>();
+        JsonbValue* json_val = JsonbDocument::createValue(val.get_value(), 
val.get_size());
+        convert_jsonb_to_rapidjson(*json_val, target, allocator);
+        break;
+    }
+    case vectorized::Field::Types::String: {
+        const String& val = field.get<String>();
+        target.SetString(val.data(), 
cast_set<rapidjson::SizeType>(val.size()));
+        break;
+    }
+    case vectorized::Field::Types::Array: {
+        const vectorized::Array& array = field.get<Array>();
+        target.SetArray();
+        for (const vectorized::Field& item : array) {
+            rapidjson::Value val;
+            convert_field_to_rapidjson(item, val, allocator);
+            target.PushBack(val, allocator);
+        }
+        break;
+    }
+    case vectorized::Field::Types::VariantMap: {
+        const vectorized::VariantMap& map = field.get<VariantMap>();
+        target.SetObject();
+        for (const auto& item : map) {
+            if (item.second.is_null()) {
+                continue;
+            }
+            rapidjson::Value key;
+            key.SetString(item.first.get_path().data(),
+                          
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
+            rapidjson::Value val;
+            convert_field_to_rapidjson(item.second, val, allocator);
+            if (val.IsNull() && item.first.empty()) {
+                // skip null value with empty key, indicate the null json 
value of root in variant map,
+                // usally padding in nested arrays
+                continue;
+            }
+            target.AddMember(key, val, allocator);
+        }
+        break;
+    }
+    default:
+        throw doris::Exception(ErrorCode::INTERNAL_ERROR, "unkown field type: 
{}",
+                               field.get_type_name());
+        break;
+    }
+}
+
+void convert_variant_map_to_rapidjson(const vectorized::VariantMap& map, 
rapidjson::Value& target,
+                                      rapidjson::Document::AllocatorType& 
allocator) {
+    target.SetObject();
+    for (const auto& item : map) {
+        if (item.second.is_null()) {
+            continue;
+        }
+        rapidjson::Value key;
+        key.SetString(item.first.get_path().data(),
+                      
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
+        rapidjson::Value val;
+        convert_field_to_rapidjson(item.second, val, allocator);
+        if (val.IsNull() && item.first.empty()) {
+            // skip null value with empty key, indicate the null json value of 
root in variant map,
+            // usally padding in nested arrays
+            continue;
+        }
+        target.AddMember(key, val, allocator);
+    }
+}
+
+void convert_array_to_rapidjson(const vectorized::Array& array, 
rapidjson::Value& target,
+                                rapidjson::Document::AllocatorType& allocator) 
{
+    target.SetArray();
+    for (const vectorized::Field& item : array) {
+        rapidjson::Value val;
+        convert_field_to_rapidjson(item, val, allocator);
+        target.PushBack(val, allocator);
+    }
+}
+
+TEST(ColumnVariantTest, insert_try_insert) {
+    auto v = VariantUtil::construct_dst_varint_column();
+    FieldInfo info;
+    info.scalar_type_id = TypeIndex::Nothing;
+    info.num_dimensions = 0;
+    PathInData path("v.f");
+    auto sub = v->get_subcolumn(path);
+    Int64 value = 43;
+    sub->insert(value, info);
+
+    info.num_dimensions = 1;
+    sub->insert(value, info);
+
+    info.num_dimensions = 2;
+    sub->insert(value, info);
+}
+
+TEST(ColumnVariantTest, basic_finalize) {
+    auto variant = VariantUtil::construct_basic_varint_column();
+    // 4. finalize
+    
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+    EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(variant->size(), 10);
+
+    // check finalized subcolumn
+    // 5 subcolumn + 1 root
+    EXPECT_EQ(variant->subcolumns.size(), 6);
+    for (const auto& column : variant->subcolumns) {
+        if (column->data.is_root) {
+            continue;
+        }
+        EXPECT_EQ(column->data.data.size(), 1);
+    }
+
+    // check sparse column
+    const auto& offsets = variant->serialized_sparse_column_offsets();
+    for (int row = 0; row < 5; ++row) {
+        EXPECT_EQ(offsets[row], 0);
+    }
+    for (int row = 5; row < 10; ++row) {
+        EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
+    }
+}
+
+TEST(ColumnVariantTest, basic_deserialize) {
+    auto variant = VariantUtil::construct_basic_varint_column();
+
+    // 4. finalize
+    
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+    EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(variant->size(), 10);
+
+    const auto& [path, value] = variant->get_sparse_data_paths_and_values();
+    const auto& offsets = variant->serialized_sparse_column_offsets();
+    for (size_t row = 5; row < 10; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        EXPECT_EQ(data, StringRef("v.b.d", 5));
+        auto pair = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair.first.get<Int64>(), 30);
+
+        auto data2 = path->get_data_at(start);
+        auto pair2 = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data2, StringRef("v.c.d", 5));
+        EXPECT_EQ(pair2.first.get<Int64>(), 30);
+
+        auto data3 = path->get_data_at(start);
+        auto pair3 = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data3, StringRef("v.d.d", 5));
+        EXPECT_EQ(pair3.first.get<String>(), "50");
+        EXPECT_EQ(start, end);
+    }
+}
+
+TEST(ColumnVariantTest, basic_inset_range_from) {
+    auto src = VariantUtil::construct_basic_varint_column();
+    EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+    EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(src->size(), 10);
+
+    // dst is an empty column, has 5 subcolumn + 1 root
+    auto dst = VariantUtil::construct_dst_varint_column();
+
+    // subcolumn->subcolumn          v.b v.f v.e
+    // subcolumn->sparse_column      v.a v.c
+    // sparse_column->subcolumn      v.b.d v.c.d
+    // sparse_column->sparse_column  v.d.d
+    dst->insert_range_from(*src, 0, 10);
+    dst->finalize();
+    EXPECT_EQ(dst->size(), 10);
+
+    // 5 subcolumn
+    EXPECT_EQ(dst->subcolumns.size(), 6);
+    ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
+    std::sort(
+            dst_subcolumns.begin(), dst_subcolumns.end(),
+            [](const auto& lhsItem, const auto& rhsItem) { return 
lhsItem->path < rhsItem->path; });
+
+    for (const auto& column : dst_subcolumns) {
+        if (column->data.is_root) {
+            continue;
+        }
+        EXPECT_EQ(column->data.data.size(), 1);
+        EXPECT_EQ(column->data.data[0]->size(), 10);
+        if (column->path.get_path().size() == 3) {
+            EXPECT_EQ(column->data.get_non_null_value_size(), 10);
+        } else {
+            EXPECT_EQ(column->path.get_path().size(), 5);
+            EXPECT_EQ(column->data.get_non_null_value_size(), 5);
+            for (size_t row = 0; row != 5; ++row) {
+                EXPECT_TRUE(column->data.data[0]->is_null_at(row));
+            }
+            for (size_t row = 5; row != 10; ++row) {
+                EXPECT_EQ((*column->data.data[0])[row].get<Int64>(), 30);
+            }
+        }
+    }
+
+    // check sparse column
+    const auto& [path, value] = dst->get_sparse_data_paths_and_values();
+    const auto& offsets = dst->serialized_sparse_column_offsets();
+
+    // v.a v.c
+    for (int row = 0; row < 5; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        EXPECT_EQ(data, StringRef("v.a", 3));
+        auto pair = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+        auto data2 = path->get_data_at(start);
+        EXPECT_EQ(data2, StringRef("v.c", 3));
+        auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair2.first.get<Int64>(), 20);
+
+        EXPECT_EQ(start, end);
+    }
+
+    // v.a v.c v.d.d
+    for (int row = 5; row < 10; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        EXPECT_EQ(data, StringRef("v.a", 3));
+        auto pair = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+        auto data2 = path->get_data_at(start);
+        EXPECT_EQ(data2, StringRef("v.c", 3));
+        auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair2.first.get<Int64>(), 20);
+
+        auto data3 = path->get_data_at(start);
+        EXPECT_EQ(data3, StringRef("v.d.d", 5));
+        auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair3.first.get<String>(), "50");
+
+        EXPECT_EQ(start, end);
+    }
+}
+
+auto convert_to_jsonb_field(auto serde, auto& column) {
+    vectorized::DataTypeSerDe::FormatOptions options;
+    options.escape_char = '\\';
+    auto tmp_col = ColumnString::create();
+    VectorBufferWriter write_buffer(*tmp_col.get());
+    EXPECT_TRUE(serde->serialize_column_to_json(column, 0, 1, write_buffer, 
options).ok());
+
+    write_buffer.commit();
+    auto str_ref = tmp_col->get_data_at(0);
+    Slice data((char*)(str_ref.data), str_ref.size);
+
+    auto jsonb_type = 
doris::vectorized::DataTypeFactory::instance().create_data_type(
+            TypeIndex::JSONB, false);
+    auto jsonb_serde = jsonb_type->get_serde();
+    auto jsonb_column = jsonb_type->create_column();
+
+    DataTypeSerDe::FormatOptions format_options;
+    format_options.converted_from_string = true;
+    EXPECT_TRUE(
+            jsonb_serde->deserialize_one_cell_from_json(*jsonb_column, data, 
format_options).ok());
+    auto res = jsonb_column->get_data_at(0);
+    return JsonbField(res.data, res.size);
+}
+
+auto convert_string_to_jsonb_field(auto& column) {
+    auto str_ref = column.get_data_at(0);
+    Slice data((char*)(str_ref.data), str_ref.size);
+
+    auto jsonb_type = 
doris::vectorized::DataTypeFactory::instance().create_data_type(
+            TypeIndex::JSONB, false);
+    auto jsonb_serde = jsonb_type->get_serde();
+    auto jsonb_column = jsonb_type->create_column();
+    DataTypeSerDe::FormatOptions format_options;
+    format_options.converted_from_string = true;
+    format_options.escape_char = '\\';
+
+    EXPECT_TRUE(
+            jsonb_serde->deserialize_one_cell_from_json(*jsonb_column, data, 
format_options).ok());
+    auto res = jsonb_column->get_data_at(0);
+    return JsonbField(res.data, res.size);
+}
+
+doris::vectorized::Field get_jsonb_field(std::string_view type) {
+    static std::unordered_map<std::string_view, doris::vectorized::Field> 
field_map;
+    if (field_map.empty()) {
+        DataTypePtr data_type_int = 
doris::vectorized::DataTypeFactory::instance().create_data_type(
+                TypeIndex::Int8, false);
+        DataTypePtr data_type_array_int =
+                
std::make_shared<doris::vectorized::DataTypeArray>(data_type_int);
+        auto array_column_int = data_type_array_int->create_column();
+        array_column_int->insert(VariantUtil::get_field("array_int"));
+        auto array_serde_int = data_type_array_int->get_serde();
+        field_map["array_int"] = convert_to_jsonb_field(array_serde_int, 
*array_column_int);
+
+        DataTypePtr data_type_str = 
doris::vectorized::DataTypeFactory::instance().create_data_type(
+                TypeIndex::String, false);
+        DataTypePtr data_type_array_str =
+                
std::make_shared<doris::vectorized::DataTypeArray>(data_type_str);
+        auto array_column_str = data_type_array_str->create_column();
+        array_column_str->insert(VariantUtil::get_field("array_str"));
+        auto array_serde_str = data_type_array_str->get_serde();
+        field_map["array_str"] = convert_to_jsonb_field(array_serde_str, 
*array_column_str);
+
+        auto column_int = data_type_int->create_column();
+        column_int->insert(VariantUtil::get_field("int"));
+        auto serde_int = data_type_int->get_serde();
+        field_map["int"] = convert_to_jsonb_field(serde_int, *column_int);
+
+        // auto column_str = data_type_str->create_column();
+        // column_str->insert(VariantUtil::get_field("string"));
+        // field_map["string"] = convert_string_to_jsonb_field(*column_str);
+    }
+    return field_map[type];
+}
+
+// std::string convert_jsonb_field_to_string(doris::vectorized::Field jsonb) {
+//     const auto& val = jsonb.get<JsonbField>();
+//     const JsonbValue* json_val = 
JsonbDocument::createValue(val.get_value(), val.get_size());
+
 //     rapidjson::Document doc;
 //     doc.SetObject();
 //     rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
 //     rapidjson::Value json_value;
-//     // DataTypeSerDe::convert_field_to_rapidjson(array, json_value, 
allocator);
+//     convert_jsonb_to_rapidjson(*json_val, json_value, allocator);
 //     doc.AddMember("value", json_value, allocator);
 //     rapidjson::StringBuffer buffer;
 //     rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
 //     doc.Accept(writer);
 //     return std::string(buffer.GetString());
 // }
-//
-// TEST(ColumnVariantTest, is_null_at) {
-//     auto v = VariantUtil::construct_dst_varint_column();
-//     PathInData path("v.f");
-//     auto sub = v->get_subcolumn(path);
-//     std::cout << sub->get_least_common_typeBase()->get_name() << std::endl;
-//     EXPECT_TRUE(sub->is_null_at(0));
-//
-//     auto v1 = VariantUtil::construct_advanced_varint_column();
-//     PathInData path1("v.b.d");
-//     auto sub1 = v1->get_subcolumn(path1);
-//     EXPECT_TRUE(sub1->is_null_at(2));
-//     EXPECT_ANY_THROW(sub1->is_null_at(16));
-//     vectorized::Field f;
-//     EXPECT_ANY_THROW(sub1->get(16, f));
-//     std::cout << sub1->num_rows << std::endl;
-//     EXPECT_NO_THROW(sub1->resize(sub1->num_rows));
-//
-//     auto [sparse_column_keys, sparse_column_values] = 
v1->get_sparse_data_paths_and_values();
-//     std::string_view pa("v.a");
-//     EXPECT_NO_THROW(
-//             sub1->serialize_to_sparse_column(sparse_column_keys, pa, 
sparse_column_values, 2));
-//     EXPECT_ANY_THROW(
-//             sub1->serialize_to_sparse_column(sparse_column_keys, pa, 
sparse_column_values, 16));
-// }
-//
-// TEST(ColumnVariantTest, advanced_finalize) {
-//     auto variant = VariantUtil::construct_advanced_varint_column();
-//
-//     // 4. finalize
-//     
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-//     EXPECT_EQ(variant->size(), 15);
-//
-//     // check finalized subcolumn
-//     // 5 subcolumn + 1 root
-//     EXPECT_EQ(variant->subcolumns.size(), 6);
-//     for (const auto& column : variant->subcolumns) {
-//         if (column->data.is_root) {
-//             continue;
-//         }
-//         EXPECT_EQ(column->data.data.size(), 1);
-//     }
-//
-//     // check sparse column
-//     const auto& offsets = variant->serialized_sparse_column_offsets();
-//     for (int row = 0; row < 5; ++row) {
-//         EXPECT_EQ(offsets[row] - offsets[row - 1], 0);
-//     }
-//     for (int row = 5; row < 15; ++row) {
-//         EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
-//     }
-//
-//     {
-//         // Test fill_path_column_from_sparse_data
-//         auto map = std::make_unique<NullMap>(15, 0);
-//         vectorized::ColumnObject::fill_path_column_from_sparse_data(
-//                 *variant->get_subcolumn({}) /*root*/, map.get(), StringRef 
{"array"},
-//                 variant->get_sparse_column(), 0, 5);
-//         vectorized::ColumnObject::fill_path_column_from_sparse_data(
-//                 *variant->get_subcolumn({}) /*root*/, map.get(), StringRef 
{"array"},
-//                 variant->get_sparse_column(), 5, 15);
-//     }
-// }
-//
-// TEST(ColumnVariantTest, advanced_deserialize) {
-//     auto variant = VariantUtil::construct_advanced_varint_column();
-//
-//     // 4. finalize
-//     
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-//     EXPECT_EQ(variant->size(), 15);
-//
-//     const auto& [path, value] = variant->get_sparse_data_paths_and_values();
-//     const auto& offsets = variant->serialized_sparse_column_offsets();
-//     for (size_t row = 5; row < 10; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         auto pair = variant->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data, StringRef("v.b.d", 5));
-//         EXPECT_EQ(convert_field_to_string(pair.first),
-//                   convert_field_to_string(get_jsonb_field("array_int")));
-//
-//         auto data2 = path->get_data_at(start);
-//         auto pair2 = variant->deserialize_from_sparse_column(value, 
start++);
-//         EXPECT_EQ(data2, StringRef("v.c.d", 5));
-//         EXPECT_EQ(convert_field_to_string(pair2.first),
-//                   
convert_field_to_string(VariantUtil::get_field("string")));
-//
-//         auto data3 = path->get_data_at(start);
-//         auto pair3 = variant->deserialize_from_sparse_column(value, 
start++);
-//         EXPECT_EQ(data3, StringRef("v.d.d", 5));
-//         EXPECT_EQ(convert_field_to_string(pair3.first),
-//                   convert_field_to_string(get_jsonb_field("array_int")));
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     for (size_t row = 10; row < 15; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         auto pair = variant->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data, StringRef("v.b.d", 5));
-//         EXPECT_EQ(convert_field_to_string(pair.first),
-//                   convert_field_to_string(get_jsonb_field("array_str")));
-//
-//         auto data2 = path->get_data_at(start);
-//         auto pair2 = variant->deserialize_from_sparse_column(value, 
start++);
-//         EXPECT_EQ(data2, StringRef("v.c.d", 5));
-//         EXPECT_EQ(convert_field_to_string(pair2.first),
-//                   convert_field_to_string(get_jsonb_field("int")));
-//
-//         auto data3 = path->get_data_at(start);
-//         auto pair3 = variant->deserialize_from_sparse_column(value, 
start++);
-//         EXPECT_EQ(data3, StringRef("v.d.d", 5));
-//         EXPECT_EQ(convert_field_to_string(pair3.first),
-//                   convert_field_to_string(get_jsonb_field("array_str")));
-//         EXPECT_EQ(start, end);
-//     }
-// }
-//
-// TEST(ColumnVariantTest, advanced_insert_range_from) {
-//     auto src = VariantUtil::construct_advanced_varint_column();
-//     EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-//     EXPECT_EQ(src->size(), 15);
-//
-//     auto dst = VariantUtil::construct_dst_varint_column();
-//
-//     // subcolumn->subcolumn          v.b v.f v.e
-//     // subcolumn->sparse_column      v.a v.c
-//     // sparse_column->subcolumn      v.b.d v.c.d
-//     // sparse_column->sparse_column  v.d.d
-//     dst->insert_range_from(*src, 0, src->size());
-//     dst->finalize();
-//     EXPECT_EQ(dst->size(), 15);
-//
-//     EXPECT_EQ(dst->subcolumns.size(), 6);
-//     ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
-//
-//     std::sort(
-//             dst_subcolumns.begin(), dst_subcolumns.end(),
-//             [](const auto& lhsItem, const auto& rhsItem) { return 
lhsItem->path < rhsItem->path; });
-//
-//     // subcolumns
-//     for (const auto& column : dst_subcolumns) {
-//         if (column->data.is_root) {
-//             continue;
-//         }
-//         EXPECT_EQ(column->data.data.size(), 1);
-//         EXPECT_EQ(column->data.data[0]->size(), 15);
-//
-//         if (column->path.get_path().size() == 3) {
-//             EXPECT_EQ(column->data.get_non_null_value_size(), 15);
-//             if (column->path.get_path() == "v.b") {
-//                 EXPECT_EQ(assert_cast<const 
DataTypeNullable*>(column->data.data_types[0].get())
-//                                   ->get_nested_type()
-//                                   ->get_type_id(),
-//                           TypeIndex::JSONB);
-//             }
-//         } else if (column->path.get_path().size() == 5) {
-//             EXPECT_EQ(column->data.get_non_null_value_size(), 10);
-//             EXPECT_EQ(assert_cast<const 
DataTypeNullable*>(column->data.data_types[0].get())
-//                               ->get_nested_type()
-//                               ->get_type_id(),
-//                       TypeIndex::JSONB);
-//             for (size_t row = 0; row < 5; ++row) {
-//                 EXPECT_TRUE(column->data.data[0]->is_null_at(row));
-//             }
-//         }
-//     }
-//
-//     // sparse columns
-//     const auto& [path, value] = dst->get_sparse_data_paths_and_values();
-//     const auto& offsets = dst->serialized_sparse_column_offsets();
-//
-//     // v.a v.c
-//     for (int row = 0; row < 5; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         EXPECT_EQ(data, StringRef("v.a", 3));
-//         auto pair = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-//         auto data2 = path->get_data_at(start);
-//         EXPECT_EQ(data2, StringRef("v.c", 3));
-//         auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(convert_field_to_string(pair2.first),
-//                   
convert_field_to_string(VariantUtil::get_field("array_int")));
-//
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     for (int row = 5; row < 10; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         auto pair = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data, StringRef("v.a", 3));
-//         EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-//         auto data2 = path->get_data_at(start);
-//         auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data2, StringRef("v.c", 3));
-//         EXPECT_EQ(convert_field_to_string(pair2.first),
-//                   
convert_field_to_string(VariantUtil::get_field("array_int")));
-//
-//         auto data3 = path->get_data_at(start);
-//         auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data3, StringRef("v.d.d", 5));
-//         EXPECT_EQ(convert_field_to_string(pair3.first),
-//                   convert_field_to_string(get_jsonb_field("array_int")));
-//
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     for (int row = 10; row < 15; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         auto pair = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data, StringRef("v.a", 3));
-//         EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-//         auto data2 = path->get_data_at(start);
-//         auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data2, StringRef("v.c", 3));
-//         EXPECT_EQ(convert_field_to_string(pair2.first),
-//                   
convert_field_to_string(VariantUtil::get_field("array_int")));
-//
-//         auto data3 = path->get_data_at(start);
-//         auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data3, StringRef("v.d.d", 5));
-//         EXPECT_EQ(convert_field_to_string(pair3.first),
-//                   convert_field_to_string(get_jsonb_field("array_str")));
-//
-//         EXPECT_EQ(start, end);
-//     }
-// }
-//
-// TEST(ColumnVariantTest, empty_inset_range_from) {
-//     auto src = VariantUtil::construct_varint_column_only_subcolumns();
-//     EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-//     EXPECT_EQ(src->size(), 6);
-//
-//     // dst is an empty column
-//     auto dst = ColumnObject::create(5);
-//
-//     // subcolumn->subcolumn          v.a v.b v.c v.f v.e
-//     dst->insert_range_from(*src, 0, 6);
-//     EXPECT_EQ(dst->size(), 6);
-//
-//     // 5 subcolumn
-//     EXPECT_EQ(dst->subcolumns.size(), 6);
-//
-//     for (const auto& column : dst->subcolumns) {
-//         if (column->data.is_root) {
-//             EXPECT_EQ(column->data.data.size(), 1);
-//             EXPECT_EQ(column->data.data[0]->size(), 6);
-//             EXPECT_EQ(column->data.get_non_null_value_size(), 1);
-//             continue;
-//         }
-//         EXPECT_EQ(column->data.data.size(), 1);
-//         EXPECT_EQ(column->data.data[0]->size(), 6);
-//         EXPECT_EQ(column->data.get_non_null_value_size(), 5);
-//     }
-//
-//     // empty sparse column
-//     const auto& [path, value] = dst->get_sparse_data_paths_and_values();
-//     const auto& offsets = dst->serialized_sparse_column_offsets();
-//     EXPECT_EQ(offsets[4], offsets[-1]);
-//     EXPECT_EQ(path->size(), value->size());
-//
-//     auto src_contains_seven_subcolumns = 
VariantUtil::construct_varint_column_more_subcolumns();
-//
-//     EXPECT_TRUE(
-//             
src_contains_seven_subcolumns->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-//     EXPECT_EQ(src_contains_seven_subcolumns->size(), 5);
-//
-//     // subcolumn->subcolumn          v.a v.b v.c v.f v.e
-//     // add sprase columns            v.s v.x v.y v.z
-//     dst->insert_range_from(*src_contains_seven_subcolumns, 0, 5);
-//     EXPECT_EQ(dst->size(), 11);
-//
-//     // 5 subcolumn
-//     EXPECT_EQ(dst->subcolumns.size(), 6);
-//
-//     for (int row = 0; row < 6; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     // v.s v.x v.y v.z
-//     for (int row = 6; row < 11; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data0 = path->get_data_at(start);
-//         EXPECT_EQ(data0, StringRef("v.s", 3));
-//         auto pair0 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(convert_field_to_string(pair0.first),
-//                   
convert_field_to_string(VariantUtil::get_field("string")));
-//
-//         auto data = path->get_data_at(start);
-//         EXPECT_EQ(data, StringRef("v.x", 3));
-//         auto pair = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair.first.get<Int16>(), 
std::numeric_limits<Int16>::max());
-//
-//         auto data2 = path->get_data_at(start);
-//         EXPECT_EQ(data2, StringRef("v.y", 3));
-//         auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair2.first.get<Int32>(), 
std::numeric_limits<Int32>::max());
-//
-//         auto data3 = path->get_data_at(start);
-//         EXPECT_EQ(data3, StringRef("v.z", 3));
-//         auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair3.first.get<Int64>(),
-//                   
Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1));
-//
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     auto src_contains_subcoumns_and_sparse_columns = 
VariantUtil::construct_basic_varint_column();
-//     EXPECT_TRUE(src_contains_subcoumns_and_sparse_columns
-//                         ->finalize(ColumnObject::FinalizeMode::WRITE_MODE)
-//                         .ok());
-//     EXPECT_EQ(src_contains_subcoumns_and_sparse_columns->size(), 10);
-//
-//     // subcolumn->subcolumn          v.a v.b v.c v.f v.e
-//     // add sprase columns            v.s v.x v.y v.b.d v.c.d v.d.d
-//     dst->insert_range_from(*src_contains_subcoumns_and_sparse_columns, 0, 
10);
-//     EXPECT_EQ(dst->size(), 21);
-//
-//     // 5 subcolumn
-//     EXPECT_EQ(dst->subcolumns.size(), 6);
-//
-//     for (int row = 0; row < 6; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     // v.x v.y
-//     for (int row = 6; row < 11; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data0 = path->get_data_at(start);
-//         EXPECT_EQ(data0, StringRef("v.s", 3));
-//         auto pair0 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(convert_field_to_string(pair0.first),
-//                   
convert_field_to_string(VariantUtil::get_field("string")));
-//
-//         auto data = path->get_data_at(start);
-//         EXPECT_EQ(data, StringRef("v.x", 3));
-//         auto pair = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair.first.get<Int16>(), 
std::numeric_limits<Int16>::max());
-//
-//         auto data2 = path->get_data_at(start);
-//         EXPECT_EQ(data2, StringRef("v.y", 3));
-//         auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair2.first.get<Int32>(), 
std::numeric_limits<Int32>::max());
-//
-//         auto data3 = path->get_data_at(start);
-//         EXPECT_EQ(data3, StringRef("v.z", 3));
-//         auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair3.first.get<Int64>(),
-//                   
Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1));
-//
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     for (int row = 11; row < 16; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         EXPECT_EQ(start, end);
-//     }
-//
-//     //v.b.d v.c.d v.d.d
-//     for (int row = 16; row < 21; ++row) {
-//         size_t start = offsets[row - 1];
-//         size_t end = offsets[row];
-//
-//         auto data = path->get_data_at(start);
-//         EXPECT_EQ(data, StringRef("v.b.d", 5));
-//         auto pair = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(pair.first.get<Int64>(), 30);
-//
-//         auto data2 = path->get_data_at(start);
-//         auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data2, StringRef("v.c.d", 5));
-//         EXPECT_EQ(pair2.first.get<Int64>(), 30);
-//
-//         auto data3 = path->get_data_at(start);
-//         auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-//         EXPECT_EQ(data3, StringRef("v.d.d", 5));
-//         EXPECT_EQ(pair3.first.get<String>(), "50");
-//         EXPECT_EQ(start, end);
-//     }
-// }
\ No newline at end of file
+
+std::string convert_field_to_string(doris::vectorized::Field array) {
+    rapidjson::Document doc;
+    doc.SetObject();
+    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
+    rapidjson::Value json_value;
+    // DataTypeSerDe::convert_field_to_rapidjson(array, json_value, allocator);
+    doc.AddMember("value", json_value, allocator);
+    rapidjson::StringBuffer buffer;
+    rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
+    doc.Accept(writer);
+    return std::string(buffer.GetString());
+}
+
+TEST(ColumnVariantTest, is_null_at) {
+    auto v = VariantUtil::construct_dst_varint_column();
+    PathInData path("v.f");
+    auto sub = v->get_subcolumn(path);
+    std::cout << sub->get_least_common_typeBase()->get_name() << std::endl;
+    EXPECT_TRUE(sub->is_null_at(0));
+
+    auto v1 = VariantUtil::construct_advanced_varint_column();
+    PathInData path1("v.b.d");
+    auto sub1 = v1->get_subcolumn(path1);
+    EXPECT_TRUE(sub1->is_null_at(2));
+    EXPECT_ANY_THROW(sub1->is_null_at(16));
+    vectorized::Field f;
+    EXPECT_ANY_THROW(sub1->get(16, f));
+    std::cout << sub1->num_rows << std::endl;
+    EXPECT_NO_THROW(sub1->resize(sub1->num_rows));
+
+    auto [sparse_column_keys, sparse_column_values] = 
v1->get_sparse_data_paths_and_values();
+    std::string_view pa("v.a");
+    EXPECT_NO_THROW(
+            sub1->serialize_to_sparse_column(sparse_column_keys, pa, 
sparse_column_values, 2));
+    EXPECT_ANY_THROW(
+            sub1->serialize_to_sparse_column(sparse_column_keys, pa, 
sparse_column_values, 16));
+}
+
+TEST(ColumnVariantTest, advanced_finalize) {
+    auto variant = VariantUtil::construct_advanced_varint_column();
+
+    // 4. finalize
+    
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+    EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(variant->size(), 15);
+
+    // check finalized subcolumn
+    // 5 subcolumn + 1 root
+    EXPECT_EQ(variant->subcolumns.size(), 6);
+    for (const auto& column : variant->subcolumns) {
+        if (column->data.is_root) {
+            continue;
+        }
+        EXPECT_EQ(column->data.data.size(), 1);
+    }
+
+    // check sparse column
+    const auto& offsets = variant->serialized_sparse_column_offsets();
+    for (int row = 0; row < 5; ++row) {
+        EXPECT_EQ(offsets[row] - offsets[row - 1], 0);
+    }
+    for (int row = 5; row < 15; ++row) {
+        EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
+    }
+
+    {
+        // Test fill_path_column_from_sparse_data
+        auto map = std::make_unique<NullMap>(15, 0);
+        vectorized::ColumnObject::fill_path_column_from_sparse_data(
+                *variant->get_subcolumn({}) /*root*/, map.get(), StringRef 
{"array"},
+                variant->get_sparse_column(), 0, 5);
+        vectorized::ColumnObject::fill_path_column_from_sparse_data(
+                *variant->get_subcolumn({}) /*root*/, map.get(), StringRef 
{"array"},
+                variant->get_sparse_column(), 5, 15);
+    }
+}
+
+TEST(ColumnVariantTest, advanced_deserialize) {
+    auto variant = VariantUtil::construct_advanced_varint_column();
+
+    // 4. finalize
+    
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+    EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(variant->size(), 15);
+
+    const auto& [path, value] = variant->get_sparse_data_paths_and_values();
+    const auto& offsets = variant->serialized_sparse_column_offsets();
+    for (size_t row = 5; row < 10; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        auto pair = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data, StringRef("v.b.d", 5));
+        EXPECT_EQ(convert_field_to_string(pair.first),
+                  convert_field_to_string(get_jsonb_field("array_int")));
+
+        auto data2 = path->get_data_at(start);
+        auto pair2 = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data2, StringRef("v.c.d", 5));
+        EXPECT_EQ(convert_field_to_string(pair2.first),
+                  convert_field_to_string(VariantUtil::get_field("string")));
+
+        auto data3 = path->get_data_at(start);
+        auto pair3 = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data3, StringRef("v.d.d", 5));
+        EXPECT_EQ(convert_field_to_string(pair3.first),
+                  convert_field_to_string(get_jsonb_field("array_int")));
+        EXPECT_EQ(start, end);
+    }
+
+    for (size_t row = 10; row < 15; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        auto pair = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data, StringRef("v.b.d", 5));
+        EXPECT_EQ(convert_field_to_string(pair.first),
+                  convert_field_to_string(get_jsonb_field("array_str")));
+
+        auto data2 = path->get_data_at(start);
+        auto pair2 = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data2, StringRef("v.c.d", 5));
+        EXPECT_EQ(convert_field_to_string(pair2.first),
+                  convert_field_to_string(get_jsonb_field("int")));
+
+        auto data3 = path->get_data_at(start);
+        auto pair3 = variant->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data3, StringRef("v.d.d", 5));
+        EXPECT_EQ(convert_field_to_string(pair3.first),
+                  convert_field_to_string(get_jsonb_field("array_str")));
+        EXPECT_EQ(start, end);
+    }
+}
+
+TEST(ColumnVariantTest, advanced_insert_range_from) {
+    auto src = VariantUtil::construct_advanced_varint_column();
+    EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+    EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(src->size(), 15);
+
+    auto dst = VariantUtil::construct_dst_varint_column();
+
+    // subcolumn->subcolumn          v.b v.f v.e
+    // subcolumn->sparse_column      v.a v.c
+    // sparse_column->subcolumn      v.b.d v.c.d
+    // sparse_column->sparse_column  v.d.d
+    dst->insert_range_from(*src, 0, src->size());
+    dst->finalize();
+    EXPECT_EQ(dst->size(), 15);
+
+    EXPECT_EQ(dst->subcolumns.size(), 6);
+    ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
+
+    std::sort(
+            dst_subcolumns.begin(), dst_subcolumns.end(),
+            [](const auto& lhsItem, const auto& rhsItem) { return 
lhsItem->path < rhsItem->path; });
+
+    // subcolumns
+    for (const auto& column : dst_subcolumns) {
+        if (column->data.is_root) {
+            continue;
+        }
+        EXPECT_EQ(column->data.data.size(), 1);
+        EXPECT_EQ(column->data.data[0]->size(), 15);
+
+        if (column->path.get_path().size() == 3) {
+            EXPECT_EQ(column->data.get_non_null_value_size(), 15);
+            if (column->path.get_path() == "v.b") {
+                EXPECT_EQ(assert_cast<const 
DataTypeNullable*>(column->data.data_types[0].get())
+                                  ->get_nested_type()
+                                  ->get_type_id(),
+                          TypeIndex::JSONB);
+            }
+        } else if (column->path.get_path().size() == 5) {
+            EXPECT_EQ(column->data.get_non_null_value_size(), 10);
+            EXPECT_EQ(assert_cast<const 
DataTypeNullable*>(column->data.data_types[0].get())
+                              ->get_nested_type()
+                              ->get_type_id(),
+                      TypeIndex::JSONB);
+            for (size_t row = 0; row < 5; ++row) {
+                EXPECT_TRUE(column->data.data[0]->is_null_at(row));
+            }
+        }
+    }
+
+    // sparse columns
+    const auto& [path, value] = dst->get_sparse_data_paths_and_values();
+    const auto& offsets = dst->serialized_sparse_column_offsets();
+
+    // v.a v.c
+    for (int row = 0; row < 5; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        EXPECT_EQ(data, StringRef("v.a", 3));
+        auto pair = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+        auto data2 = path->get_data_at(start);
+        EXPECT_EQ(data2, StringRef("v.c", 3));
+        auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(convert_field_to_string(pair2.first),
+                  
convert_field_to_string(VariantUtil::get_field("array_int")));
+
+        EXPECT_EQ(start, end);
+    }
+
+    for (int row = 5; row < 10; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        auto pair = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data, StringRef("v.a", 3));
+        EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+        auto data2 = path->get_data_at(start);
+        auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data2, StringRef("v.c", 3));
+        EXPECT_EQ(convert_field_to_string(pair2.first),
+                  
convert_field_to_string(VariantUtil::get_field("array_int")));
+
+        auto data3 = path->get_data_at(start);
+        auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data3, StringRef("v.d.d", 5));
+        EXPECT_EQ(convert_field_to_string(pair3.first),
+                  convert_field_to_string(get_jsonb_field("array_int")));
+
+        EXPECT_EQ(start, end);
+    }
+
+    for (int row = 10; row < 15; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        auto pair = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data, StringRef("v.a", 3));
+        EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+        auto data2 = path->get_data_at(start);
+        auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data2, StringRef("v.c", 3));
+        EXPECT_EQ(convert_field_to_string(pair2.first),
+                  
convert_field_to_string(VariantUtil::get_field("array_int")));
+
+        auto data3 = path->get_data_at(start);
+        auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data3, StringRef("v.d.d", 5));
+        EXPECT_EQ(convert_field_to_string(pair3.first),
+                  convert_field_to_string(get_jsonb_field("array_str")));
+
+        EXPECT_EQ(start, end);
+    }
+}
+
+TEST(ColumnVariantTest, empty_inset_range_from) {
+    auto src = VariantUtil::construct_varint_column_only_subcolumns();
+    EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+    EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(src->size(), 6);
+
+    // dst is an empty column
+    auto dst = ColumnObject::create(5);
+
+    // subcolumn->subcolumn          v.a v.b v.c v.f v.e
+    dst->insert_range_from(*src, 0, 6);
+    EXPECT_EQ(dst->size(), 6);
+
+    // 5 subcolumn
+    EXPECT_EQ(dst->subcolumns.size(), 6);
+
+    for (const auto& column : dst->subcolumns) {
+        if (column->data.is_root) {
+            EXPECT_EQ(column->data.data.size(), 1);
+            EXPECT_EQ(column->data.data[0]->size(), 6);
+            EXPECT_EQ(column->data.get_non_null_value_size(), 1);
+            continue;
+        }
+        EXPECT_EQ(column->data.data.size(), 1);
+        EXPECT_EQ(column->data.data[0]->size(), 6);
+        EXPECT_EQ(column->data.get_non_null_value_size(), 5);
+    }
+
+    // empty sparse column
+    const auto& [path, value] = dst->get_sparse_data_paths_and_values();
+    const auto& offsets = dst->serialized_sparse_column_offsets();
+    EXPECT_EQ(offsets[4], offsets[-1]);
+    EXPECT_EQ(path->size(), value->size());
+
+    auto src_contains_seven_subcolumns = 
VariantUtil::construct_varint_column_more_subcolumns();
+
+    EXPECT_TRUE(
+            
src_contains_seven_subcolumns->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+    
EXPECT_TRUE(src_contains_seven_subcolumns->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(src_contains_seven_subcolumns->size(), 5);
+
+    // subcolumn->subcolumn          v.a v.b v.c v.f v.e
+    // add sprase columns            v.s v.x v.y v.z
+    dst->insert_range_from(*src_contains_seven_subcolumns, 0, 5);
+    EXPECT_EQ(dst->size(), 11);
+
+    // 5 subcolumn
+    EXPECT_EQ(dst->subcolumns.size(), 6);
+
+    for (int row = 0; row < 6; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        EXPECT_EQ(start, end);
+    }
+
+    // v.s v.x v.y v.z
+    for (int row = 6; row < 11; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data0 = path->get_data_at(start);
+        EXPECT_EQ(data0, StringRef("v.s", 3));
+        auto pair0 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(convert_field_to_string(pair0.first),
+                  convert_field_to_string(VariantUtil::get_field("string")));
+
+        auto data = path->get_data_at(start);
+        EXPECT_EQ(data, StringRef("v.x", 3));
+        auto pair = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair.first.get<Int16>(), std::numeric_limits<Int16>::max());
+
+        auto data2 = path->get_data_at(start);
+        EXPECT_EQ(data2, StringRef("v.y", 3));
+        auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair2.first.get<Int32>(), std::numeric_limits<Int32>::max());
+
+        auto data3 = path->get_data_at(start);
+        EXPECT_EQ(data3, StringRef("v.z", 3));
+        auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair3.first.get<Int64>(),
+                  Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) 
+ 1));
+
+        EXPECT_EQ(start, end);
+    }
+
+    auto src_contains_subcoumns_and_sparse_columns = 
VariantUtil::construct_basic_varint_column();
+    EXPECT_TRUE(src_contains_subcoumns_and_sparse_columns
+                        ->finalize(ColumnObject::FinalizeMode::WRITE_MODE)
+                        .ok());
+    EXPECT_TRUE(
+            
src_contains_subcoumns_and_sparse_columns->pick_subcolumns_to_sparse_column({}).ok());
+    EXPECT_EQ(src_contains_subcoumns_and_sparse_columns->size(), 10);
+
+    // subcolumn->subcolumn          v.a v.b v.c v.f v.e
+    // add sprase columns            v.s v.x v.y v.b.d v.c.d v.d.d
+    dst->insert_range_from(*src_contains_subcoumns_and_sparse_columns, 0, 10);
+    EXPECT_EQ(dst->size(), 21);
+
+    // 5 subcolumn
+    EXPECT_EQ(dst->subcolumns.size(), 6);
+
+    for (int row = 0; row < 6; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        EXPECT_EQ(start, end);
+    }
+
+    // v.x v.y
+    for (int row = 6; row < 11; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data0 = path->get_data_at(start);
+        EXPECT_EQ(data0, StringRef("v.s", 3));
+        auto pair0 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(convert_field_to_string(pair0.first),
+                  convert_field_to_string(VariantUtil::get_field("string")));
+
+        auto data = path->get_data_at(start);
+        EXPECT_EQ(data, StringRef("v.x", 3));
+        auto pair = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair.first.get<Int16>(), std::numeric_limits<Int16>::max());
+
+        auto data2 = path->get_data_at(start);
+        EXPECT_EQ(data2, StringRef("v.y", 3));
+        auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair2.first.get<Int32>(), std::numeric_limits<Int32>::max());
+
+        auto data3 = path->get_data_at(start);
+        EXPECT_EQ(data3, StringRef("v.z", 3));
+        auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair3.first.get<Int64>(),
+                  Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) 
+ 1));
+
+        EXPECT_EQ(start, end);
+    }
+
+    for (int row = 11; row < 16; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        EXPECT_EQ(start, end);
+    }
+
+    //v.b.d v.c.d v.d.d
+    for (int row = 16; row < 21; ++row) {
+        size_t start = offsets[row - 1];
+        size_t end = offsets[row];
+
+        auto data = path->get_data_at(start);
+        EXPECT_EQ(data, StringRef("v.b.d", 5));
+        auto pair = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(pair.first.get<Int64>(), 30);
+
+        auto data2 = path->get_data_at(start);
+        auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data2, StringRef("v.c.d", 5));
+        EXPECT_EQ(pair2.first.get<Int64>(), 30);
+
+        auto data3 = path->get_data_at(start);
+        auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+        EXPECT_EQ(data3, StringRef("v.d.d", 5));
+        EXPECT_EQ(pair3.first.get<String>(), "50");
+        EXPECT_EQ(start, end);
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to