This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new 2a3da528c9e [ut](cases)add ut cases for schema utils (#50435)
2a3da528c9e is described below

commit 2a3da528c9ea6e0367fdbada1518e5333f85dd34
Author: amory <wangqian...@selectdb.com>
AuthorDate: Sun Apr 27 10:16:00 2025 +0800

    [ut](cases)add ut cases for schema utils (#50435)
    
    ut case for schema_utils.cpp
    coverage :
    
    amory/doris/be/src/vec/common/schema_util.cpp | 95.08% (58/61) | 88.79%
    (911/1026) | 85.07% (456/536) | 81.34% (327/402)
    -- | -- | -- | -- | --
    
    
    
    
[amory/doris/be/src/vec/common/schema_util.cpp](https://github.com/apache/doris/pull/50435/coverage/mnt/disk1/wangqiannan/amory/doris/be/src/vec/common/schema_util.cpp.html)
      95.08% (58/61)
      88.79% (911/1026)
      85.07% (456/536)
      81.34% (327/402)
---
 be/src/olap/rowset/segment_v2/segment.h        |   2 +-
 be/src/olap/schema.h                           |  16 +-
 be/test/vec/common/schema_util_rowset_test.cpp | 138 +++-
 be/test/vec/common/schema_util_test.cpp        | 937 +++++++++++++++++++++++++
 4 files changed, 1083 insertions(+), 10 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index 0e839bfd465..0c8f73eba88 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -181,7 +181,7 @@ public:
     template <typename Predicate>
     bool can_apply_predicate_safely(int cid, Predicate* pred, const Schema& 
schema,
                                     ReaderType read_type) const {
-        const Field* col = schema.column(cid);
+        const doris::Field* col = schema.column(cid);
         vectorized::DataTypePtr storage_column_type =
                 get_data_type_of(col->get_desc(), read_type != 
ReaderType::READER_QUERY);
         if (storage_column_type == nullptr) {
diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h
index 6414db4153a..2913ceac483 100644
--- a/be/src/olap/schema.h
+++ b/be/src/olap/schema.h
@@ -117,7 +117,7 @@ public:
         _init(columns, col_ids, num_key_columns);
     }
 
-    Schema(const std::vector<const Field*>& cols, size_t num_key_columns) {
+    Schema(const std::vector<const doris::Field*>& cols, size_t 
num_key_columns) {
         std::vector<ColumnId> col_ids(cols.size());
         _unique_ids.resize(cols.size());
         for (uint32_t cid = 0; cid < cols.size(); ++cid) {
@@ -139,19 +139,19 @@ public:
 
     ~Schema();
 
-    static vectorized::DataTypePtr get_data_type_ptr(const Field& field);
+    static vectorized::DataTypePtr get_data_type_ptr(const doris::Field& 
field);
 
-    static vectorized::IColumn::MutablePtr get_column_by_field(const Field& 
field);
+    static vectorized::IColumn::MutablePtr get_column_by_field(const 
doris::Field& field);
 
     static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const 
FieldType& type,
                                                                     bool 
is_nullable,
                                                                     const 
ReaderType reader_type);
 
-    const std::vector<Field*>& columns() const { return _cols; }
+    const std::vector<doris::Field*>& columns() const { return _cols; }
 
-    const Field* column(ColumnId cid) const { return _cols[cid]; }
+    const doris::Field* column(ColumnId cid) const { return _cols[cid]; }
 
-    Field* mutable_column(ColumnId cid) const { return _cols[cid]; }
+    doris::Field* mutable_column(ColumnId cid) const { return _cols[cid]; }
 
     size_t num_key_columns() const { return _num_key_columns; }
     size_t schema_size() const { return _schema_size; }
@@ -185,7 +185,7 @@ public:
 private:
     void _init(const std::vector<TabletColumnPtr>& cols, const 
std::vector<ColumnId>& col_ids,
                size_t num_key_columns);
-    void _init(const std::vector<const Field*>& cols, const 
std::vector<ColumnId>& col_ids,
+    void _init(const std::vector<const doris::Field*>& cols, const 
std::vector<ColumnId>& col_ids,
                size_t num_key_columns);
 
     void _copy_from(const Schema& other);
@@ -196,7 +196,7 @@ private:
     std::vector<int32_t> _unique_ids;
     // NOTE: Both _cols[cid] and _col_offsets[cid] can only be accessed when 
the cid is
     // contained in _col_ids
-    std::vector<Field*> _cols;
+    std::vector<doris::Field*> _cols;
     // The value of each item indicates the starting offset of the 
corresponding column in
     // current row. e.g. _col_offsets[idx] is the offset of _cols[idx] (idx 
must in _col_ids)
     std::vector<size_t> _col_offsets;
diff --git a/be/test/vec/common/schema_util_rowset_test.cpp 
b/be/test/vec/common/schema_util_rowset_test.cpp
index eb09d77a163..4ea25e53db8 100644
--- a/be/test/vec/common/schema_util_rowset_test.cpp
+++ b/be/test/vec/common/schema_util_rowset_test.cpp
@@ -205,6 +205,92 @@ static RowsetSharedPtr create_rowset(auto& rowset_writer, 
const TabletSchemaSPtr
     return rowset;
 }
 
+TEST_F(SchemaUtilRowsetTest, check_path_stats_agg_key) {
+    // 1.create tablet schema
+    TabletSchemaPB schema_pb;
+    schema_pb.set_keys_type(AGG_KEYS);
+    construct_column(schema_pb.add_column(), 0, "INT", "key", true);
+    construct_column(schema_pb.add_column(), 1, "VARIANT", "v1");
+    construct_column(schema_pb.add_column(), 2, "STRING", "v2");
+    construct_column(schema_pb.add_column(), 3, "VARIANT", "v3");
+    construct_column(schema_pb.add_column(), 4, "INT", "v4");
+    TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+    tablet_schema->init_from_pb(schema_pb);
+
+    // 2. create tablet
+    TabletMetaSharedPtr tablet_meta(new TabletMeta(tablet_schema));
+    string absolute_dir = _curreent_dir + 
std::string("/ut_dir/schema_util_rows");
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(absolute_dir).ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(absolute_dir).ok());
+    std::unique_ptr<DataDir> _data_dir = 
std::make_unique<DataDir>(*_engine_ref, absolute_dir);
+    static_cast<void>(_data_dir->update_capacity());
+    EXPECT_TRUE(_data_dir->init(true).ok());
+
+    TabletSharedPtr _tablet = std::make_shared<Tablet>(*_engine_ref, 
tablet_meta, _data_dir.get());
+    EXPECT_TRUE(_tablet->init().ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+    // 3. create rowset
+    std::vector<RowsetSharedPtr> rowsets;
+    for (int i = 0; i < 5; i++) {
+        const auto& res = RowsetFactory::create_rowset_writer(
+                *_engine_ref,
+                rowset_writer_context(_data_dir, tablet_schema, 
_tablet->tablet_path()), false);
+        EXPECT_TRUE(res.has_value()) << res.error();
+        const auto& rowset_writer = res.value();
+        auto rowset = create_rowset(rowset_writer, tablet_schema);
+        EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
+        rowsets.push_back(rowset);
+    }
+
+    // 7. check output rowset
+    EXPECT_TRUE(schema_util::check_path_stats(rowsets, rowsets[0], 
_tablet).ok());
+}
+
+TEST_F(SchemaUtilRowsetTest, check_path_stats_agg_delete) {
+    // 1.create tablet schema
+    TabletSchemaPB schema_pb;
+    schema_pb.set_delete_sign_idx(0);
+    construct_column(schema_pb.add_column(), 0, "INT", "key", true);
+    construct_column(schema_pb.add_column(), 1, "VARIANT", "v1");
+    construct_column(schema_pb.add_column(), 2, "STRING", "v2");
+    construct_column(schema_pb.add_column(), 3, "VARIANT", "v3");
+    construct_column(schema_pb.add_column(), 4, "INT", "v4");
+    TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+    tablet_schema->init_from_pb(schema_pb);
+
+    // 2. create tablet
+    TabletMetaSharedPtr tablet_meta(new TabletMeta(tablet_schema));
+    string absolute_dir = _curreent_dir + 
std::string("/ut_dir/schema_util_rows1");
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(absolute_dir).ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(absolute_dir).ok());
+    std::unique_ptr<DataDir> _data_dir = 
std::make_unique<DataDir>(*_engine_ref, absolute_dir);
+    static_cast<void>(_data_dir->update_capacity());
+    EXPECT_TRUE(_data_dir->init(true).ok());
+
+    TabletSharedPtr _tablet = std::make_shared<Tablet>(*_engine_ref, 
tablet_meta, _data_dir.get());
+    EXPECT_TRUE(_tablet->init().ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+    // 3. create rowset
+    std::vector<RowsetSharedPtr> rowsets;
+    for (int i = 0; i < 5; i++) {
+        const auto& res = RowsetFactory::create_rowset_writer(
+                *_engine_ref,
+                rowset_writer_context(_data_dir, tablet_schema, 
_tablet->tablet_path()), false);
+        EXPECT_TRUE(res.has_value()) << res.error();
+        const auto& rowset_writer = res.value();
+        auto rowset = create_rowset(rowset_writer, tablet_schema);
+        EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
+        rowsets.push_back(rowset);
+    }
+
+    // 7. check output rowset
+    Status st = schema_util::check_path_stats(rowsets, rowsets[0], _tablet);
+    std::cout << st.to_string() << std::endl;
+    EXPECT_FALSE(st.ok());
+}
+
 TEST_F(SchemaUtilRowsetTest, collect_path_stats_and_get_compaction_schema) {
     // 1.create tablet schema
     TabletSchemaPB schema_pb;
@@ -244,7 +330,7 @@ TEST_F(SchemaUtilRowsetTest, 
collect_path_stats_and_get_compaction_schema) {
 
     for (const auto& [uid, path_stats] : path_stats) {
         for (const auto& [path, size] : path_stats) {
-            EXPECT_EQ(all_path_stats[uid][path], size);
+            EXPECT_EQ(all_path_stats[uid][path], size * 3);
         }
     }
 
@@ -268,6 +354,56 @@ TEST_F(SchemaUtilRowsetTest, 
collect_path_stats_and_get_compaction_schema) {
         EXPECT_TRUE(paths[2].ends_with("key1"));
         EXPECT_TRUE(paths[3].ends_with("key2"));
     }
+
+    // 6.compaction for output rs
+    // create input rowset reader
+    vector<RowsetReaderSharedPtr> input_rs_readers;
+    for (auto& rowset : rowsets) {
+        RowsetReaderSharedPtr rs_reader;
+        ASSERT_TRUE(rowset->create_reader(&rs_reader).ok());
+        input_rs_readers.push_back(std::move(rs_reader));
+    }
+
+    auto sc = schema_util::calculate_variant_extended_schema(rowsets, 
tablet_schema);
+    std::cout << sc->columns().size() << std::endl;
+
+    // create output rowset writer
+    auto create_rowset_writer_context = [this](TabletSchemaSPtr tablet_schema,
+                                               const SegmentsOverlapPB& 
overlap,
+                                               uint32_t max_rows_per_segment, 
Version version) {
+        static int64_t inc_id = 1000;
+        RowsetWriterContext rowset_writer_context;
+        RowsetId rowset_id;
+        rowset_id.init(inc_id);
+        rowset_writer_context.rowset_id = rowset_id;
+        rowset_writer_context.rowset_type = BETA_ROWSET;
+        rowset_writer_context.rowset_state = VISIBLE;
+        rowset_writer_context.tablet_schema = tablet_schema;
+        rowset_writer_context.tablet_path = _absolute_dir + "/../";
+        rowset_writer_context.version = version;
+        rowset_writer_context.segments_overlap = overlap;
+        rowset_writer_context.max_rows_per_segment = max_rows_per_segment;
+        inc_id++;
+        return rowset_writer_context;
+    };
+    auto writer_context = create_rowset_writer_context(tablet_schema, 
NONOVERLAPPING, 3456,
+                                                       {0, 
rowsets.back()->end_version()});
+    auto res_ = RowsetFactory::create_rowset_writer(*_engine_ref, 
writer_context, true);
+    ASSERT_TRUE(res_.has_value()) << res_.error();
+    auto output_rs_writer = std::move(res_).value();
+    Merger::Statistics stats;
+    RowIdConversion rowid_conversion;
+    stats.rowid_conversion = &rowid_conversion;
+    auto s = Merger::vertical_merge_rowsets(_tablet, 
ReaderType::READER_BASE_COMPACTION,
+                                            *tablet_schema, input_rs_readers,
+                                            output_rs_writer.get(), 100, 5, 
&stats);
+    ASSERT_TRUE(s.ok()) << s;
+    RowsetSharedPtr out_rowset;
+    EXPECT_EQ(Status::OK(), output_rs_writer->build(out_rowset));
+    ASSERT_TRUE(out_rowset);
+
+    // 7. check output rowset
+    EXPECT_TRUE(schema_util::check_path_stats(rowsets, out_rowset, 
_tablet).ok());
 }
 
 TEST_F(SchemaUtilRowsetTest, typed_path) {
diff --git a/be/test/vec/common/schema_util_test.cpp 
b/be/test/vec/common/schema_util_test.cpp
index 3d58863d594..0e8f093e1e6 100644
--- a/be/test/vec/common/schema_util_test.cpp
+++ b/be/test/vec/common/schema_util_test.cpp
@@ -20,7 +20,18 @@
 #include <gmock/gmock-more-matchers.h>
 #include <gtest/gtest.h>
 
+#include "olap/rowset/rowset_fwd.h"
 #include "olap/rowset/segment_v2/variant_column_writer_impl.h"
+#include "testutil/variant_util.h"
+#include "vec/columns/column_nothing.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/schema_util.cpp"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_object.h"
+#include "vec/data_types/data_type_time_v2.h"
 
 using namespace doris::vectorized;
 
@@ -470,3 +481,929 @@ TEST_F(SchemaUtilTest, generate_sub_column_info_advanced) 
{
     EXPECT_EQ(sub_column_info.column.parent_unique_id(), 10);
     EXPECT_FALSE(sub_column_info.index);
 }
+
+TEST_F(SchemaUtilTest, TestArrayDimensions) {
+    // Test get_number_of_dimensions for DataType
+    auto array_type = 
std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>());
+    auto nested_array_type = std::make_shared<DataTypeArray>(array_type);
+
+    EXPECT_EQ(schema_util::get_number_of_dimensions(*array_type), 1);
+    EXPECT_EQ(schema_util::get_number_of_dimensions(*nested_array_type), 2);
+    
EXPECT_EQ(schema_util::get_number_of_dimensions(*std::make_shared<DataTypeInt32>()),
 0);
+
+    // Test get_number_of_dimensions for Column
+    auto array_column =
+            ColumnArray::create(ColumnInt32::create(), 
ColumnArray::ColumnOffsets::create());
+    auto nested_array_column =
+            ColumnArray::create(array_column->get_ptr(), 
ColumnArray::ColumnOffsets::create());
+
+    EXPECT_EQ(schema_util::get_number_of_dimensions(*array_column), 1);
+    EXPECT_EQ(schema_util::get_number_of_dimensions(*nested_array_column), 2);
+    EXPECT_EQ(schema_util::get_number_of_dimensions(*ColumnInt32::create()), 
0);
+
+    // Test get_base_type_of_array
+    auto base_type = schema_util::get_base_type_of_array(array_type);
+    EXPECT_EQ(base_type->get_type_id(), TypeIndex::Int32);
+
+    base_type = schema_util::get_base_type_of_array(nested_array_type);
+    EXPECT_EQ(base_type->get_type_id(), TypeIndex::Int32);
+
+    // Test create_empty_array_field
+    auto array_field = schema_util::create_empty_array_field(2);
+    EXPECT_EQ(array_field.size(), 1);
+    EXPECT_TRUE(array_field[0].get<Array>().empty());
+}
+
+TEST_F(SchemaUtilTest, TestIntegerConversion) {
+    // Test conversion between integers
+    
EXPECT_FALSE(schema_util::is_conversion_required_between_integers(TypeIndex::Int8,
+                                                                      
TypeIndex::Int16));
+    
EXPECT_FALSE(schema_util::is_conversion_required_between_integers(TypeIndex::Int8,
+                                                                      
TypeIndex::Int32));
+    
EXPECT_FALSE(schema_util::is_conversion_required_between_integers(TypeIndex::Int16,
+                                                                      
TypeIndex::Int32));
+
+    
EXPECT_TRUE(schema_util::is_conversion_required_between_integers(TypeIndex::Int32,
+                                                                     
TypeIndex::Int16));
+    
EXPECT_TRUE(schema_util::is_conversion_required_between_integers(TypeIndex::Int64,
+                                                                     
TypeIndex::Int32));
+
+    
EXPECT_FALSE(schema_util::is_conversion_required_between_integers(TypeIndex::UInt8,
+                                                                      
TypeIndex::UInt16));
+    
EXPECT_TRUE(schema_util::is_conversion_required_between_integers(TypeIndex::UInt32,
+                                                                     
TypeIndex::UInt16));
+}
+
+TEST_F(SchemaUtilTest, TestColumnCasting) {
+    // Test cast_column
+    auto src_type = std::make_shared<DataTypeInt32>();
+    auto dst_type = std::make_shared<DataTypeInt64>();
+
+    auto column = ColumnInt32::create();
+    column->insert(42);
+
+    ColumnWithTypeAndName src_col;
+    src_col.type = src_type;
+    src_col.column = column->get_ptr();
+    src_col.name = "test_col";
+
+    ColumnPtr result;
+    auto status = schema_util::cast_column(src_col, dst_type, &result);
+
+    EXPECT_TRUE(status.ok());
+    EXPECT_EQ(result->get_int(0), 42);
+    EXPECT_EQ(result->get_name(), TypeName<Int64>::get());
+}
+
+TEST_F(SchemaUtilTest, TestGetColumnByType) {
+    // Test get_column_by_type
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    auto array_type = 
std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>());
+    auto nullable_type = make_nullable(int_type);
+
+    schema_util::ExtraInfo ext_info;
+    ext_info.unique_id = 1;
+    ext_info.parent_unique_id = 2;
+    ext_info.path_info = PathInData("test.path");
+
+    // Test integer type
+    auto int_column = schema_util::get_column_by_type(int_type, "int_col", 
ext_info);
+    EXPECT_EQ(int_column.name(), "int_col");
+    EXPECT_EQ(int_column.type(), FieldType::OLAP_FIELD_TYPE_INT);
+    EXPECT_EQ(int_column.unique_id(), 1);
+    EXPECT_EQ(int_column.parent_unique_id(), 2);
+    EXPECT_EQ(int_column.path_info_ptr()->get_path(), "test.path");
+
+    // Test string type
+    auto string_column = schema_util::get_column_by_type(string_type, 
"string_col", ext_info);
+    EXPECT_EQ(string_column.type(), FieldType::OLAP_FIELD_TYPE_STRING);
+    EXPECT_EQ(string_column.length(), INT_MAX);
+
+    // Test array type
+    auto array_column = schema_util::get_column_by_type(array_type, 
"array_col", ext_info);
+    EXPECT_EQ(array_column.type(), FieldType::OLAP_FIELD_TYPE_ARRAY);
+    EXPECT_EQ(array_column.get_sub_column(0).type(), 
FieldType::OLAP_FIELD_TYPE_INT);
+
+    // Test nullable type
+    auto nullable_column = schema_util::get_column_by_type(nullable_type, 
"nullable_col", ext_info);
+    EXPECT_TRUE(nullable_column.is_nullable());
+    EXPECT_EQ(nullable_column.type(), FieldType::OLAP_FIELD_TYPE_INT);
+}
+
+//TEST_F(SchemaUtilTest, TestGetSortedSubcolumns) {
+//    // Create test subcolumns
+//    vectorized::ColumnObject::Subcolumns subcolumns;
+//
+//    auto create_subcolumn = [](const std::string& path) {
+//        auto subcol = 
std::make_shared<vectorized::ColumnObject::Subcolumn>();
+//        subcol->path = path;
+//        return subcol;
+//    };
+//
+//    subcolumns.push_back(create_subcolumn("c"));
+//    subcolumns.push_back(create_subcolumn("a"));
+//    subcolumns.push_back(create_subcolumn("b"));
+//
+//    auto sorted = schema_util::get_sorted_subcolumns(subcolumns);
+//
+//    EXPECT_EQ(sorted.size(), 3);
+//    EXPECT_EQ(sorted[0]->path, "a");
+//    EXPECT_EQ(sorted[1]->path, "b");
+//    EXPECT_EQ(sorted[2]->path, "c");
+//}
+
+TEST_F(SchemaUtilTest, TestHasSchemaIndexDiff) {
+    TabletSchemaPB schema1_pb;
+    TabletSchemaPB schema2_pb;
+
+    // Setup first schema
+    construct_column(schema1_pb.add_column(), schema1_pb.add_index(), 10000, 
"test_index", 1, "INT",
+                     "test_col", IndexType::INVERTED);
+    auto* col1 = schema1_pb.mutable_column(0);
+    col1->set_is_bf_column(false);
+
+    // Setup second schema with different index
+    construct_column(schema2_pb.add_column(), schema2_pb.add_index(), 10000, 
"test_index", 1, "INT",
+                     "test_col", IndexType::BLOOMFILTER);
+    auto* col2 = schema2_pb.mutable_column(0);
+    col2->set_is_bf_column(true);
+
+    TabletSchemaSPtr schema1 = std::make_shared<TabletSchema>();
+    TabletSchemaSPtr schema2 = std::make_shared<TabletSchema>();
+    schema1->init_from_pb(schema1_pb);
+    schema2->init_from_pb(schema2_pb);
+
+    EXPECT_TRUE(schema_util::has_schema_index_diff(schema1.get(), 
schema2.get(), 0, 0));
+}
+
+TEST_F(SchemaUtilTest, TestParseVariantColumns) {
+    // Create a block with variant column
+    Block block;
+
+    // Create a variant column with JSON string data
+    auto variant_type = std::make_shared<DataTypeObject>(10);
+    auto variant_column = ColumnObject::create(10);
+    auto root_column = ColumnString::create();
+    root_column->insert("{'a': 1, 'b': 'test'}");
+    variant_column->create_root(std::make_shared<DataTypeString>(), 
root_column->get_ptr());
+
+    block.insert({variant_column->get_ptr(), variant_type, "variant_col"});
+
+    std::vector<int> variant_pos {0};
+    ParseConfig config;
+
+    auto status = schema_util::parse_variant_columns(block, variant_pos, 
config);
+    EXPECT_TRUE(status.ok());
+
+    // Check the parsed variant column
+    const auto& result_column = block.get_by_position(0).column;
+    EXPECT_TRUE(result_column->get_name().find("variant") != 
std::string::npos);
+
+    const auto& obj_column = assert_cast<const ColumnObject&>(*result_column);
+    EXPECT_TRUE(obj_column.is_scalar_variant());
+}
+
+TEST_F(SchemaUtilTest, TestGetLeastCommonSchema) {
+    // Create test schemas
+    TabletSchemaPB schema1_pb;
+    schema1_pb.set_keys_type(KeysType::DUP_KEYS);
+    construct_column(schema1_pb.add_column(), schema1_pb.add_index(), 10000, 
"v1_index", 1,
+                     "VARIANT", "v1", IndexType::INVERTED);
+
+    TabletSchemaPB schema2_pb;
+    schema2_pb.set_keys_type(KeysType::DUP_KEYS);
+    construct_column(schema2_pb.add_column(), schema2_pb.add_index(), 10000, 
"v1_index", 1,
+                     "VARIANT", "v1", IndexType::INVERTED);
+
+    TabletSchemaSPtr schema1 = std::make_shared<TabletSchema>();
+    TabletSchemaSPtr schema2 = std::make_shared<TabletSchema>();
+    schema1->init_from_pb(schema1_pb);
+    schema2->init_from_pb(schema2_pb);
+
+    std::vector<TabletSchemaSPtr> schemas {schema1, schema2};
+    TabletSchemaSPtr result_schema;
+
+    auto status = schema_util::get_least_common_schema(schemas, nullptr, 
result_schema);
+    EXPECT_TRUE(status.ok());
+    EXPECT_EQ(result_schema->num_columns(), 1);
+}
+
+TEST_F(SchemaUtilTest, TestGetSizeOfInteger) {
+    // Test all integer types
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::Int8), 
sizeof(int8_t));
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::Int16), 
sizeof(int16_t));
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::Int32), 
sizeof(int32_t));
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::Int64), 
sizeof(int64_t));
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::Int128), 
sizeof(int128_t));
+
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::UInt8), 
sizeof(uint8_t));
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::UInt16), 
sizeof(uint16_t));
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::UInt32), 
sizeof(uint32_t));
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::UInt64), 
sizeof(uint64_t));
+    EXPECT_EQ(schema_util::get_size_of_interger(TypeIndex::UInt128), 
sizeof(uint128_t));
+
+    // Test invalid type
+    //    EXPECT_THROW(schema_util::get_size_of_interger(TypeIndex::String), 
Exception);
+}
+
+TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) {
+    // Test casting from Nothing type
+    auto nothing_type = std::make_shared<DataTypeNothing>();
+    auto dst_type = std::make_shared<DataTypeInt32>();
+
+    auto nothing_column = ColumnNothing::create(1);
+    ColumnWithTypeAndName src_col;
+    src_col.type = nothing_type;
+    src_col.column = nothing_column->get_ptr();
+    src_col.name = "nothing_col";
+
+    ColumnPtr result;
+    auto status = schema_util::cast_column(src_col, dst_type, &result);
+    EXPECT_TRUE(status.ok());
+    EXPECT_EQ(result->size(), 1);
+
+    // Test casting to variant type
+    auto variant_type = std::make_shared<DataTypeObject>(10);
+    auto nullable_array_type =
+            
make_nullable(std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>()));
+    auto array_column =
+            ColumnArray::create(ColumnInt32::create(), 
ColumnArray::ColumnOffsets::create());
+    auto nullable_array_column = make_nullable(array_column->get_ptr());
+
+    ColumnWithTypeAndName array_col;
+    array_col.type = nullable_array_type;
+    array_col.column = nullable_array_column;
+    array_col.name = "array_col";
+
+    // test Array Type cast Int will throw Exception
+    auto int_type = std::make_shared<DataTypeInt32>();
+    Status st = schema_util::cast_column(array_col, int_type, &result);
+    EXPECT_FALSE(st.ok());
+
+    ColumnPtr result1;
+    status = schema_util::cast_column(array_col, variant_type, &result1);
+    EXPECT_TRUE(status.ok());
+    EXPECT_FALSE(result1->is_nullable());
+
+    auto variant_type_nullable = make_nullable(variant_type);
+    status = schema_util::cast_column(array_col, variant_type_nullable, 
&result1);
+    EXPECT_TRUE(status.ok());
+    EXPECT_TRUE(result1->is_nullable());
+
+    // Test casting from variant to variant
+    auto variant_column = ColumnObject::create(10);
+    variant_column->create_root(nullable_array_type, 
nullable_array_column->assume_mutable());
+
+    ColumnWithTypeAndName variant_col;
+    variant_col.type = variant_type;
+    variant_col.column = variant_column->get_ptr();
+    variant_col.name = "variant_col";
+
+    ColumnPtr result2;
+    status = schema_util::cast_column(variant_col, variant_type, &result2);
+    EXPECT_TRUE(status.ok());
+    EXPECT_FALSE(result2->is_nullable());
+}
+
+TEST_F(SchemaUtilTest, TestGetColumnByTypeEdgeCases) {
+    // Test decimal type
+    auto decimal_type = std::make_shared<DataTypeDecimal<Decimal128V2>>(18, 2);
+    schema_util::ExtraInfo ext_info;
+    auto decimal_column = schema_util::get_column_by_type(decimal_type, 
"decimal_col", ext_info);
+    EXPECT_EQ(decimal_column.type(), FieldType::OLAP_FIELD_TYPE_DECIMAL);
+    EXPECT_EQ(decimal_column.precision(), 18);
+    EXPECT_EQ(decimal_column.frac(), 2);
+
+    // Test datetime type
+    auto datetime_type = std::make_shared<DataTypeDateTime>();
+    auto datetime_column = schema_util::get_column_by_type(datetime_type, 
"datetime_col", ext_info);
+    EXPECT_EQ(datetime_column.type(), FieldType::OLAP_FIELD_TYPE_DATETIME);
+
+    // Test datetime v2 type
+    auto datetime_v2_type = std::make_shared<DataTypeDateTimeV2>(6);
+    auto datetime_v2_column =
+            schema_util::get_column_by_type(datetime_v2_type, 
"datetime_v2_col", ext_info);
+    EXPECT_EQ(datetime_v2_column.type(), 
FieldType::OLAP_FIELD_TYPE_DATETIMEV2);
+    EXPECT_EQ(datetime_v2_column.precision(), -1);
+    EXPECT_EQ(datetime_v2_column.frac(), 6);
+
+    // Test invalid type
+    auto invalid_type = std::make_shared<DataTypeNothing>();
+    EXPECT_THROW(schema_util::get_column_by_type(invalid_type, "invalid_col", 
ext_info), Exception);
+}
+
+TEST_F(SchemaUtilTest, TestUpdateLeastSchemaInternal) {
+    // Create test schemas and types
+    std::map<PathInData, DataTypes> subcolumns_types;
+    auto schema = std::make_shared<TabletSchema>();
+
+    // Add some test columns
+    TabletColumn base_col;
+    base_col.set_unique_id(1);
+    base_col.set_name("test_variant");
+    base_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    schema->append_column(base_col);
+
+    // Add different types for same path
+    PathInData test_path("test_variant.a");
+    subcolumns_types[test_path] = {std::make_shared<DataTypeInt32>(),
+                                   std::make_shared<DataTypeInt64>()};
+
+    // Add array types with different dimensions
+    PathInData array_path("test_variant.b");
+    subcolumns_types[array_path] = {
+            std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>()),
+            std::make_shared<DataTypeArray>(
+                    
std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>()))};
+
+    // Add path with single type
+    PathInData single_path("test_variant.c");
+    subcolumns_types[single_path] = {std::make_shared<DataTypeString>()};
+
+    std::map<std::string, TabletColumnPtr> typed_columns;
+    schema_util::update_least_schema_internal(subcolumns_types, schema, false, 
1, typed_columns);
+
+    // Check results
+    EXPECT_EQ(schema->num_columns(), 4); // base + 3 subcolumns
+
+    // Check that array path was converted to JSONB due to dimension mismatch
+    int array_col_idx = schema->field_index("test_variant.b");
+    EXPECT_GE(array_col_idx, 0);
+    EXPECT_EQ(schema->column(array_col_idx).type(), 
FieldType::OLAP_FIELD_TYPE_JSONB);
+
+    // Check that mixed integer types were promoted
+    int int_col_idx = schema->field_index("test_variant.a");
+    EXPECT_GE(int_col_idx, 0);
+    EXPECT_EQ(schema->column(int_col_idx).type(), 
FieldType::OLAP_FIELD_TYPE_BIGINT);
+}
+
+TEST_F(SchemaUtilTest, TestUpdateLeastCommonSchema) {
+    // Create test schemas
+    std::vector<TabletSchemaSPtr> schemas;
+    auto schema1 = std::make_shared<TabletSchema>();
+    auto schema2 = std::make_shared<TabletSchema>();
+
+    // Add variant column to both schemas
+    TabletColumn variant_col;
+    variant_col.set_unique_id(1);
+    variant_col.set_name("test_variant");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    schema1->append_column(variant_col);
+    schema2->append_column(variant_col);
+
+    // Add different subcolumns to schemas
+    TabletColumn subcol1;
+    subcol1.set_name("test_variant.a");
+    subcol1.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    subcol1.set_parent_unique_id(1);
+    subcol1.set_path_info(PathInData("test_variant.a"));
+    schema1->append_column(subcol1);
+
+    TabletColumn subcol2;
+    subcol2.set_name("test_variant.a");
+    subcol2.set_type(FieldType::OLAP_FIELD_TYPE_BIGINT);
+    subcol2.set_parent_unique_id(1);
+    subcol2.set_path_info(PathInData("test_variant.a"));
+    schema2->append_column(subcol2);
+
+    schemas.push_back(schema1);
+    schemas.push_back(schema2);
+
+    auto result_schema = std::make_shared<TabletSchema>();
+    result_schema->append_column(variant_col);
+
+    std::set<PathInData> path_set;
+    schema_util::update_least_common_schema(schemas, result_schema, 1, 
&path_set);
+
+    // Check results
+    EXPECT_EQ(result_schema->num_columns(), 2); // variant + subcolumn
+    EXPECT_EQ(path_set.size(), 1);
+    EXPECT_TRUE(path_set.find(PathInData("test_variant.a")) != path_set.end());
+
+    // Check that subcolumn type was promoted to BIGINT
+    int subcol_idx = result_schema->field_index("test_variant.a");
+    EXPECT_GE(subcol_idx, 0);
+    EXPECT_EQ(result_schema->column(subcol_idx).type(), 
FieldType::OLAP_FIELD_TYPE_BIGINT);
+}
+
+TEST_F(SchemaUtilTest, TestUpdateLeastCommonSchema2) {
+    // Create common schema with a variant column
+    TabletSchemaSPtr common_schema = std::make_shared<TabletSchema>();
+    TabletColumn variant_col;
+    variant_col.set_unique_id(1);
+    variant_col.set_name("test_variant");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    // Create subcolumns for variant column in common_schema
+    TabletColumn sub_col1;
+    sub_col1.set_name("test_variant.field1");
+    sub_col1.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    sub_col1.set_parent_unique_id(1);
+    vectorized::PathInData path1("test_variant.field1");
+    sub_col1.set_path_info(path1);
+    variant_col.add_sub_column(sub_col1);
+
+    TabletColumn sub_col2;
+    sub_col2.set_name("test_variant.field2");
+    sub_col2.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    sub_col2.set_parent_unique_id(1);
+    vectorized::PathInData path2("test_variant.field2");
+    sub_col2.set_path_info(path2);
+    variant_col.add_sub_column(sub_col2);
+
+    common_schema->append_column(variant_col);
+
+    // Create schemas vector with two schemas
+    std::vector<TabletSchemaSPtr> schemas;
+    // Schema1: doesn't have the variant column
+    auto schema1 = std::make_shared<TabletSchema>();
+    schemas.push_back(schema1);
+
+    // Schema2: has variant column with different subcolumns
+    auto schema2 = std::make_shared<TabletSchema>();
+    TabletColumn variant_col2;
+    variant_col2.set_unique_id(1);
+    variant_col2.set_name("test_variant");
+    variant_col2.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    // Add subcolumns to schema2's variant column
+    TabletColumn sub_col3;
+    sub_col3.set_name("test_variant.field3");
+    sub_col3.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    sub_col3.set_parent_unique_id(1);
+    vectorized::PathInData path3("test_variant.field3");
+    sub_col3.set_path_info(path3);
+    variant_col2.add_sub_column(sub_col3);
+
+    // Add a subcolumn with same path but different type
+    TabletColumn sub_col1_different_type;
+    sub_col1_different_type.set_name("test_variant.field1");
+    sub_col1_different_type.set_type(FieldType::OLAP_FIELD_TYPE_BIGINT);
+    sub_col1_different_type.set_parent_unique_id(1);
+    sub_col1_different_type.set_path_info(path1);
+    variant_col2.add_sub_column(sub_col1_different_type);
+
+    schema2->append_column(variant_col2);
+    schemas.push_back(schema2);
+
+    // Create path_set that contains some paths
+    std::set<PathInData> path_set;
+    path_set.insert(path1);
+    path_set.insert(path2);
+    path_set.insert(path3);
+
+    // Test update_least_common_schema
+    // This should cover:
+    // 1. schema->field_index(variant_col_unique_id) == -1 branch (via schema1)
+    // 2. The for loop over sparse_columns() (via schema2)
+    // 3. subcolumns_types.find(*col->path_info_ptr()) != 
subcolumns_types.end() branch
+    schema_util::update_least_common_schema(schemas, common_schema, 1, 
&path_set);
+
+    // Verify results
+    const auto& result_variant = common_schema->column_by_uid(1);
+
+    // Check that all subcolumns are present
+    EXPECT_EQ(result_variant.get_sub_columns().size(), 2);
+
+    // Check that field1 has the most compatible type (should be BIGINT due to 
type promotion)
+    bool found_field1 = false;
+    bool found_field2 = false;
+    bool found_field3 = false;
+
+    for (const auto& col : result_variant.get_sub_columns()) {
+        if (col->name() == "test_variant.field1") {
+            found_field1 = true;
+            EXPECT_EQ(col->type(), FieldType::OLAP_FIELD_TYPE_INT);
+        } else if (col->name() == "test_variant.field2") {
+            found_field2 = true;
+            EXPECT_EQ(col->type(), FieldType::OLAP_FIELD_TYPE_STRING);
+        } else if (col->name() == "test_variant.field3") {
+            EXPECT_EQ(col->type(), FieldType::OLAP_FIELD_TYPE_INT);
+        }
+    }
+
+    EXPECT_TRUE(found_field1);
+    EXPECT_TRUE(found_field2);
+    EXPECT_FALSE(found_field3);
+}
+
+TEST_F(SchemaUtilTest, TestUpdateLeastCommonSchema3) {
+    // Create common schema with a variant column
+    TabletSchemaSPtr common_schema = std::make_shared<TabletSchema>();
+    TabletColumn variant_col;
+    variant_col.set_unique_id(1);
+    variant_col.set_name("test_variant");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    // Create sparse columns for variant column in common_schema
+    TabletColumn sparse_col1;
+    sparse_col1.set_name("test_variant.sparse1");
+    sparse_col1.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    sparse_col1.set_parent_unique_id(1);
+    vectorized::PathInData path1("test_variant.sparse1");
+    sparse_col1.set_path_info(path1);
+    variant_col.append_sparse_column(sparse_col1);
+
+    TabletColumn sparse_col2;
+    sparse_col2.set_name("test_variant.sparse2");
+    sparse_col2.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    sparse_col2.set_parent_unique_id(1);
+    vectorized::PathInData path2("test_variant.sparse2");
+    sparse_col2.set_path_info(path2);
+    variant_col.append_sparse_column(sparse_col2);
+
+    common_schema->append_column(variant_col);
+
+    // Create schemas vector with two schemas
+    std::vector<TabletSchemaSPtr> schemas;
+
+    // Schema1: doesn't have the variant column
+    auto schema1 = std::make_shared<TabletSchema>();
+    schemas.push_back(schema1);
+
+    // Schema2: has variant column with different sparse columns
+    auto schema2 = std::make_shared<TabletSchema>();
+    TabletColumn variant_col2;
+    variant_col2.set_unique_id(1);
+    variant_col2.set_name("test_variant");
+    variant_col2.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    // Add sparse columns to schema2's variant column
+    TabletColumn sparse_col3;
+    sparse_col3.set_name("test_variant.sparse3");
+    sparse_col3.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    sparse_col3.set_parent_unique_id(1);
+    vectorized::PathInData path3("test_variant.sparse3");
+    sparse_col3.set_path_info(path3);
+    variant_col2.append_sparse_column(sparse_col3);
+
+    TabletColumn sparse_col4;
+    sparse_col4.set_name("test_variant.sparse4");
+    sparse_col4.set_type(FieldType::OLAP_FIELD_TYPE_DOUBLE);
+    sparse_col4.set_parent_unique_id(1);
+    vectorized::PathInData path4("test_variant.sparse4");
+    sparse_col4.set_path_info(path4);
+    variant_col2.append_sparse_column(sparse_col4);
+
+    schema2->append_column(variant_col2);
+    schemas.push_back(schema2);
+
+    // Create path_set that contains some but not all sparse column paths
+    std::set<PathInData> path_set;
+    path_set.insert(path1); // from common_schema
+    path_set.insert(path3); // from schema2
+
+    // Test update_least_sparse_column
+    // This should cover:
+    // 1. schema->field_index(variant_col_unique_id) == -1 branch (via schema1)
+    // 2. The for loop over sparse_columns() (via schema2)
+    // 3. path_set.find(*col->path_info_ptr()) == path_set.end() branch (via 
sparse_col4)
+    schema_util::update_least_common_schema(schemas, common_schema, 1, 
&path_set);
+
+    // Verify that only sparse columns not in path_set are kept
+    const auto& result_variant = common_schema->column_by_uid(1);
+    EXPECT_EQ(result_variant.sparse_columns().size(), 2);
+
+    // Check that sparse_col2 and sparse_col4 are kept (they weren't in 
path_set)
+    bool found_sparse2 = false;
+    bool found_sparse4 = false;
+    for (const auto& col : result_variant.sparse_columns()) {
+        if (col->name() == "test_variant.sparse2") {
+            found_sparse2 = true;
+        } else if (col->name() == "test_variant.sparse4") {
+            found_sparse4 = true;
+        }
+    }
+    EXPECT_TRUE(found_sparse2);
+    EXPECT_FALSE(found_sparse4);
+}
+
+TEST_F(SchemaUtilTest, TestUpdateLeastSparseColumn) {
+    // Create test schemas
+    std::vector<TabletSchemaSPtr> schemas;
+    auto schema1 = std::make_shared<TabletSchema>();
+    auto schema2 = std::make_shared<TabletSchema>();
+
+    // Add variant column to both schemas
+    TabletColumn variant_col;
+    variant_col.set_unique_id(1);
+    variant_col.set_name("test_variant");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    // Add sparse columns to schemas
+    TabletColumn sparse_col1;
+    sparse_col1.set_name("test_variant.sparse1");
+    sparse_col1.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    sparse_col1.set_parent_unique_id(1);
+    sparse_col1.set_path_info(PathInData("test_variant.sparse1"));
+
+    TabletColumn sparse_col2;
+    sparse_col2.set_name("test_variant.sparse2");
+    sparse_col2.set_type(FieldType::OLAP_FIELD_TYPE_BIGINT);
+    sparse_col2.set_parent_unique_id(1);
+    sparse_col2.set_path_info(PathInData("test_variant.sparse2"));
+
+    schema1->append_column(variant_col);
+    schema1->append_column(sparse_col1);
+    schema2->append_column(variant_col);
+    schema2->append_column(sparse_col2);
+
+    schemas.push_back(schema1);
+    schemas.push_back(schema2);
+
+    auto result_schema = std::make_shared<TabletSchema>();
+    result_schema->append_column(variant_col);
+
+    std::set<PathInData> path_set;
+    path_set.insert(PathInData("test_variant.other_path")); // This path 
should be excluded
+
+    schema_util::update_least_sparse_column(schemas, result_schema, 1, 
path_set);
+
+    // Check results : why 0?
+    EXPECT_EQ(result_schema->column_by_uid(1).sparse_columns().size(), 0);
+}
+
+TEST_F(SchemaUtilTest, TestUpdateLeastSparseColumn2) {
+    // Test case 1: schema doesn't have the variant column
+    TabletSchema schema;
+    TabletColumn variant;
+    variant.set_unique_id(2); // Different ID than what we'll search for
+    schema.append_column(variant);
+
+    std::vector<TabletSchemaSPtr> schemas;
+    auto schema1 = std::make_shared<TabletSchema>();
+    auto schema2 = std::make_shared<TabletSchema>();
+    schemas.push_back(schema1);
+    schemas.push_back(schema2);
+
+    auto result_schema = std::make_shared<TabletSchema>();
+    std::set<PathInData> path_set;
+    path_set.insert(PathInData("test.path"));
+
+    // This should handle the case where field_index returns -1
+    //    schema_util::update_least_sparse_column(schemas, result_schema, 1, 
path_set);
+    //    EXPECT_EQ(result_schema->num_columns(), 0);
+
+    // Test case 2: schema has variant column but no sparse columns
+    TabletColumn variant2;
+    variant2.set_unique_id(1);
+    variant2.set_name("test_variant");
+    variant2.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    result_schema->append_column(variant2);
+
+    // This should handle the case where sparse_columns is empty
+    schema_util::update_least_sparse_column(schemas, result_schema, 1, 
path_set);
+    EXPECT_EQ(result_schema->column_by_uid(1).sparse_columns().size(), 0);
+
+    // Test case 3: schema has variant column with sparse columns but empty 
path_set
+    TabletColumn sparse_col;
+    sparse_col.set_name("test_variant.sparse");
+    sparse_col.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    sparse_col.set_parent_unique_id(1);
+    sparse_col.set_path_info(PathInData("test_variant.sparse"));
+    variant2.append_sparse_column(sparse_col);
+
+    // dropped Variant Col
+
+    std::set<PathInData> empty_path_set;
+    schema_util::update_least_sparse_column(schemas, result_schema, 1, 
empty_path_set);
+    EXPECT_EQ(result_schema->column_by_uid(1).sparse_columns().size(), 0);
+}
+
+TEST_F(SchemaUtilTest, TestUpdateLeastSparseColumn3) {
+    // Create common schema with a variant column
+    TabletSchemaSPtr common_schema = std::make_shared<TabletSchema>();
+    TabletColumn variant_col;
+    variant_col.set_unique_id(1);
+    variant_col.set_name("test_variant");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    // Create sparse columns for variant column in common_schema
+    TabletColumn sparse_col1;
+    sparse_col1.set_name("test_variant.sparse1");
+    sparse_col1.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    sparse_col1.set_parent_unique_id(1);
+    vectorized::PathInData path1("test_variant.sparse1");
+    sparse_col1.set_path_info(path1);
+    variant_col.append_sparse_column(sparse_col1);
+
+    TabletColumn sparse_col2;
+    sparse_col2.set_name("test_variant.sparse2");
+    sparse_col2.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    sparse_col2.set_parent_unique_id(1);
+    vectorized::PathInData path2("test_variant.sparse2");
+    sparse_col2.set_path_info(path2);
+    variant_col.append_sparse_column(sparse_col2);
+
+    common_schema->append_column(variant_col);
+
+    // Create schemas vector with two schemas
+    std::vector<TabletSchemaSPtr> schemas;
+
+    // Schema1: doesn't have the variant column
+    auto schema1 = std::make_shared<TabletSchema>();
+    schemas.push_back(schema1);
+
+    // Schema2: has variant column with different sparse columns
+    auto schema2 = std::make_shared<TabletSchema>();
+    TabletColumn variant_col2;
+    variant_col2.set_unique_id(1);
+    variant_col2.set_name("test_variant");
+    variant_col2.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    // Add sparse columns to schema2's variant column
+    TabletColumn sparse_col3;
+    sparse_col3.set_name("test_variant.sparse3");
+    sparse_col3.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    sparse_col3.set_parent_unique_id(1);
+    vectorized::PathInData path3("test_variant.sparse3");
+    sparse_col3.set_path_info(path3);
+    variant_col2.append_sparse_column(sparse_col3);
+
+    TabletColumn sparse_col4;
+    sparse_col4.set_name("test_variant.sparse4");
+    sparse_col4.set_type(FieldType::OLAP_FIELD_TYPE_DOUBLE);
+    sparse_col4.set_parent_unique_id(1);
+    vectorized::PathInData path4("test_variant.sparse4");
+    sparse_col4.set_path_info(path4);
+    variant_col2.append_sparse_column(sparse_col4);
+
+    schema2->append_column(variant_col2);
+    schemas.push_back(schema2);
+
+    // Create path_set that contains some but not all sparse column paths
+    std::set<PathInData> path_set;
+    path_set.insert(path1); // from common_schema
+    path_set.insert(path3); // from schema2
+
+    // Test update_least_sparse_column
+    // This should cover:
+    // 1. schema->field_index(variant_col_unique_id) == -1 branch (via schema1)
+    // 2. The for loop over sparse_columns() (via schema2)
+    // 3. path_set.find(*col->path_info_ptr()) == path_set.end() branch (via 
sparse_col4)
+    schema_util::update_least_sparse_column(schemas, common_schema, 1, 
path_set);
+
+    // Verify that only sparse columns not in path_set are kept
+    const auto& result_variant = common_schema->column_by_uid(1);
+    EXPECT_EQ(result_variant.sparse_columns().size(), 3);
+
+    // Check that sparse_col2 and sparse_col4 are kept (they weren't in 
path_set)
+    bool found_sparse2 = false;
+    bool found_sparse4 = false;
+    for (const auto& col : result_variant.sparse_columns()) {
+        if (col->name() == "test_variant.sparse2") {
+            found_sparse2 = true;
+        } else if (col->name() == "test_variant.sparse4") {
+            found_sparse4 = true;
+        }
+    }
+    EXPECT_TRUE(found_sparse2);
+    EXPECT_TRUE(found_sparse4);
+}
+
+TEST_F(SchemaUtilTest, TestGetCompactionSchema) {
+    // Create test rowsets
+    std::vector<RowsetSharedPtr> rowsets;
+    RowsetMetaSharedPtr rowset_meta = std::make_shared<RowsetMeta>();
+
+    // Create schema for rowsets
+    TabletSchemaPB schema_pb;
+    schema_pb.set_keys_type(KeysType::DUP_KEYS);
+    construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000, 
"v1_index", 1, "VARIANT",
+                     "v1", IndexType::INVERTED);
+
+    auto schema = std::make_shared<TabletSchema>();
+    schema->init_from_pb(schema_pb);
+
+    // Add path statistics
+    std::unordered_map<int32_t, schema_util::PathToNoneNullValues> path_stats;
+    path_stats[1] = {{"v1.a", 1000}, {"v1.b", 800}, {"v1.c", 500}, {"v1.d", 
300}, {"v1.e", 200}};
+
+    // Mock rowset behavior
+    //    BetaRowset rowset1(schema, rowset_meta, "");
+    //    BetaRowset rowset2(schema, rowset_meta, "");
+    auto rowset1 = std::make_shared<BetaRowset>(schema, rowset_meta, "");
+    auto rowset2 = std::make_shared<BetaRowset>(schema, rowset_meta, "");
+    rowsets.push_back(rowset1);
+    rowsets.push_back(rowset2);
+
+    auto target_schema = std::make_shared<TabletSchema>();
+    target_schema->init_from_pb(schema_pb);
+
+    auto status = schema_util::get_compaction_schema(rowsets, target_schema);
+    EXPECT_TRUE(status.ok());
+
+    // Check that paths were properly distributed between subcolumns and 
sparse columns
+    const auto& variant_col = target_schema->column_by_uid(1);
+    // this is not work!!!
+    EXPECT_EQ(variant_col.get_sub_columns().size(), 0);
+    EXPECT_EQ(variant_col.sparse_columns().size(), 0);
+}
+
+TEST_F(SchemaUtilTest, TestGetSortedSubcolumns) {
+    // Create test subcolumns
+    vectorized::ColumnObject::Subcolumns subcolumns;
+    auto obj = VariantUtil::construct_dst_varint_column();
+
+    auto sorted = schema_util::get_sorted_subcolumns(obj->get_subcolumns());
+    std::vector<std::string> expected_paths = {"", "v.b", "v.b.d", "v.c.d", 
"v.e", "v.f"};
+    EXPECT_EQ(sorted.size(), 6);
+    int i = 0;
+    for (auto iter = sorted.begin(); iter != sorted.end(); ++iter) {
+        EXPECT_EQ(iter.operator*()->path.get_path(), expected_paths[i++]);
+    }
+}
+
+TEST_F(SchemaUtilTest, TestCreateSparseColumn) {
+    TabletColumn variant;
+    variant.set_name("test_variant");
+    variant.set_unique_id(42);
+    
variant.set_aggregation_method(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC);
+
+    auto sparse_column = schema_util::create_sparse_column(variant);
+
+    EXPECT_EQ(sparse_column.name(), "test_variant." + SPARSE_COLUMN_PATH);
+    EXPECT_EQ(sparse_column.type(), FieldType::OLAP_FIELD_TYPE_MAP);
+    EXPECT_EQ(sparse_column.aggregation(), 
FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC);
+    EXPECT_EQ(sparse_column.parent_unique_id(), 42);
+    EXPECT_EQ(sparse_column.path_info_ptr()->get_path(), "test_variant." + 
SPARSE_COLUMN_PATH);
+
+    // Check map value columns
+    EXPECT_EQ(sparse_column.get_sub_column(0).type(), 
FieldType::OLAP_FIELD_TYPE_STRING);
+    EXPECT_EQ(sparse_column.get_sub_column(1).type(), 
FieldType::OLAP_FIELD_TYPE_STRING);
+}
+
+TEST_F(SchemaUtilTest, TestParseVariantColumnsEdgeCases) {
+    Block block;
+
+    // Test parsing from string to variant
+    auto variant_type = std::make_shared<DataTypeObject>(10);
+    auto variant_column = ColumnObject::create(10);
+    auto root_column = ColumnString::create();
+
+    // Add some test JSON data
+    root_column->insert("{'a': 1, 'b': 'test'}");
+    root_column->insert("{'a': 2, 'c': [1,2,3]}");
+    root_column->insert("{'a': 3, 'd': {'x': 1}}");
+
+    variant_column->create_root(std::make_shared<DataTypeString>(), 
root_column->get_ptr());
+    block.insert({variant_column->get_ptr(), variant_type, "variant_col"});
+
+    std::vector<int> variant_pos {0};
+    ParseConfig config;
+
+    auto status = schema_util::parse_variant_columns(block, variant_pos, 
config);
+    EXPECT_TRUE(status.ok());
+
+    // Test parsing from JSONB to variant
+    auto jsonb_type = std::make_shared<DataTypeJsonb>();
+    auto jsonb_column = ColumnString::create();
+    jsonb_column->insert("{'x': 1}");
+
+    auto variant_column2 = ColumnObject::create(10);
+    variant_column2->create_root(jsonb_type, jsonb_column->get_ptr());
+
+    Block block2;
+    block2.insert({variant_column2->get_ptr(), variant_type, "variant_col2"});
+
+    status = schema_util::parse_variant_columns(block2, {0}, config);
+    EXPECT_TRUE(status.ok());
+
+    // Test parsing already parsed variant
+    auto variant_column3 = ColumnObject::create(10);
+    variant_column3->finalize();
+
+    Block block3;
+    block3.insert({variant_column3->get_ptr(), variant_type, "variant_col3"});
+
+    status = schema_util::parse_variant_columns(block3, {0}, config);
+    EXPECT_TRUE(status.ok());
+}
+
+TEST_F(SchemaUtilTest, TestParseVariantColumnsWithNulls) {
+    Block block;
+
+    // Create a nullable variant column
+    auto variant_type = make_nullable(std::make_shared<DataTypeObject>(10));
+    auto string_type = make_nullable(std::make_shared<DataTypeString>());
+
+    auto string_column = ColumnString::create();
+    string_column->insert("{'a': 1}");
+    auto nullable_string = make_nullable(string_column->get_ptr());
+
+    auto variant_column = ColumnObject::create(10);
+    variant_column->create_root(string_type, 
nullable_string->assume_mutable());
+    auto nullable_variant = make_nullable(variant_column->get_ptr());
+
+    block.insert({nullable_variant, variant_type, "nullable_variant"});
+
+    std::vector<int> variant_pos {0};
+    ParseConfig config;
+
+    auto status = schema_util::parse_variant_columns(block, variant_pos, 
config);
+    EXPECT_TRUE(status.ok());
+
+    const auto& result_column = block.get_by_position(0).column;
+    EXPECT_TRUE(result_column->is_nullable());
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to