This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch feat-nested in repository https://gitbox.apache.org/repos/asf/doris.git
commit 958ffa4995648edd806c8395de825b8def5111c0 Author: eldenmoon <[email protected]> AuthorDate: Fri Jan 9 20:28:24 2026 +0800 Test NestedGroup segment write/read integration. Update variant writer/reader test to tolerate __ng columns in footer and validate that whole-variant reads merge NestedGroup JSONB back as subcolumns, and that $.a.b can be read as a JSON array. --- .../variant_column_writer_reader_test.cpp | 93 ++++++++++++++++++++-- 1 file changed, 87 insertions(+), 6 deletions(-) diff --git a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp index 70e72ee18ed..da3a3373257 100644 --- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp +++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp @@ -25,6 +25,7 @@ #include "olap/rowset/segment_v2/variant/variant_column_reader.h" #include "olap/rowset/segment_v2/variant/variant_column_writer_impl.h" #include "olap/storage_engine.h" +#include "rapidjson/document.h" #include "testutil/variant_util.h" using namespace doris::vectorized; @@ -360,7 +361,6 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) { st = variant_column_reader->new_iterator(&it, &parent_column, &storage_read_opts, &column_reader_cache); EXPECT_TRUE(st.ok()) << st.msg(); - EXPECT_TRUE(assert_cast<HierarchicalDataIterator*>(it.get()) != nullptr); ColumnIteratorOptions column_iter_opts; column_iter_opts.stats = &stats; column_iter_opts.file_reader = file_reader.get(); @@ -850,15 +850,39 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_advanced) { // 6. check footer int expected_sparse_cols = variant_sparse_hash_shard_count > 1 ? variant_sparse_hash_shard_count : 1; - EXPECT_EQ(footer.columns_size(), 1 + 10 + expected_sparse_cols); + // NestedGroup columns (__ng.*) may be appended after sparse columns. + EXPECT_GE(footer.columns_size(), 1 + 10 + expected_sparse_cols); auto column_meta = footer.columns(0); EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT); - for (int i = 1; i < footer.columns_size() - 1; ++i) { - auto column_met = footer.columns(i); - check_column_meta(column_met, path_with_size); + bool has_nested_group = false; + int sparse_meta_cnt = 0; + int subcolumn_meta_cnt = 0; + for (int i = 1; i < footer.columns_size(); ++i) { + const auto& col = footer.columns(i); + if (!col.has_column_path_info()) { + continue; + } + const auto& info = col.column_path_info(); + if (info.has_nested_group_parent_path() || + (info.has_is_nested_group_offsets() && info.is_nested_group_offsets())) { + has_nested_group = true; + continue; + } + auto path = std::make_shared<vectorized::PathInData>(); + path->from_protobuf(info); + const std::string base = path->copy_pop_front().get_path(); + if (base == "__DORIS_VARIANT_SPARSE__" || base.rfind("__DORIS_VARIANT_SPARSE__.b", 0) == 0) { + check_sparse_column_meta(col, path_with_size); + sparse_meta_cnt++; + } else { + check_column_meta(col, path_with_size); + subcolumn_meta_cnt++; + } } - check_sparse_column_meta(footer.columns(footer.columns_size() - 1), path_with_size); + EXPECT_TRUE(has_nested_group); + EXPECT_EQ(sparse_meta_cnt, expected_sparse_cols); + EXPECT_EQ(subcolumn_meta_cnt, 10); // 7. check variant reader io::FileReaderSPtr file_reader; @@ -914,6 +938,63 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_advanced) { EXPECT_EQ(value, inserted_jsonstr[i]); } + // Whole Variant read should merge NestedGroup back as JSONB subcolumn "a.b". + { + const auto* cv = assert_cast<ColumnVariant*>(new_column_object.get()); + const auto* ab = cv->get_subcolumn(PathInData("a.b")); + ASSERT_TRUE(ab != nullptr); + ASSERT_TRUE(ab->get_least_common_type() != nullptr); + EXPECT_EQ(remove_nullable(ab->get_least_common_type())->get_type_id(), TypeIndex::JSONB); + } + + // Whole NestedGroup access: $.a.b should return Nullable(Variant(JSONB)) and be a JSON array. + { + TabletColumn ab_col; + ab_col.set_name(parent_column.name_lower_case() + ".a.b"); + ab_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); + ab_col.set_parent_unique_id(parent_column.unique_id()); + ab_col.set_path_info(PathInData(parent_column.name_lower_case() + ".a.b")); + ab_col.set_variant_max_subcolumns_count(parent_column.variant_max_subcolumns_count()); + ab_col.set_is_nullable(true); + + ColumnIteratorUPtr ab_it; + st = variant_column_reader->new_iterator(&ab_it, &ab_col, &storage_read_opts, + &column_reader_cache); + EXPECT_TRUE(st.ok()) << st.msg(); + st = ab_it->init(column_iter_opts); + EXPECT_TRUE(st.ok()) << st.msg(); + size_t ab_rows = 1000; + auto ab_dst = ColumnVariant::create(3); + st = ab_it->seek_to_ordinal(0); + EXPECT_TRUE(st.ok()) << st.msg(); + st = ab_it->next_batch(&ab_rows, ab_dst); + EXPECT_TRUE(st.ok()) << st.msg(); + EXPECT_EQ(ab_rows, 1000); + + for (int i = 0; i < 1000; ++i) { + std::string json; + assert_cast<ColumnVariant*>(ab_dst.get())->serialize_one_row_to_string(i, &json); + rapidjson::Document d; + d.Parse(json.c_str()); + ASSERT_FALSE(d.HasParseError()); + ASSERT_TRUE(d.IsArray()); + ASSERT_EQ(d.Size(), 1); + ASSERT_TRUE(d[0].IsObject()); + ASSERT_TRUE(d[0].HasMember("c")); + ASSERT_TRUE(d[0]["c"].IsObject()); + ASSERT_TRUE(d[0]["c"].HasMember("d")); + ASSERT_TRUE(d[0]["c"].HasMember("e")); + EXPECT_EQ(d[0]["c"]["d"].GetInt(), i); + EXPECT_EQ(std::string(d[0]["c"]["e"].GetString()), std::to_string(i)); + if (i % 17 == 0) { + ASSERT_TRUE(d[0]["c"].HasMember("f")); + EXPECT_EQ(d[0]["c"]["f"].GetInt(), i); + } else { + EXPECT_FALSE(d[0]["c"].HasMember("f")); + } + } + } + auto read_to_column_object = [&](ColumnIteratorUPtr& it) { new_column_object = ColumnVariant::create(10); nrows = 1000; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
