This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 72726031077 [fix](variant)fix variant subcolumn without finalize in
get behavior (#53418)
72726031077 is described below
commit 727260310778100c17fc7420f12905aa457f02df
Author: amory <[email protected]>
AuthorDate: Fri Jul 18 14:00:41 2025 +0800
[fix](variant)fix variant subcolumn without finalize in get behavior
(#53418)
fix variant subcolumn without finalize in get behavior
---
be/src/vec/columns/column_variant.cpp | 43 +++++----
be/src/vec/columns/column_variant.h | 2 +
be/test/vec/columns/column_variant_test.cpp | 133 ++++++++++++++++++++++++++++
3 files changed, 156 insertions(+), 22 deletions(-)
diff --git a/be/src/vec/columns/column_variant.cpp
b/be/src/vec/columns/column_variant.cpp
index f41c066c256..49e9559cdc9 100644
--- a/be/src/vec/columns/column_variant.cpp
+++ b/be/src/vec/columns/column_variant.cpp
@@ -890,25 +890,11 @@ void ColumnVariant::Subcolumn::get(size_t n, Field& res)
const {
res = Field();
return;
}
- if (is_finalized()) {
- // TODO(hangyu) : we should use data type to get the field value
- // here is a special case for Array<JsonbField>
- if (least_common_type.get_type_id() == PrimitiveType::TYPE_ARRAY &&
- least_common_type.get_base_type_id() == PrimitiveType::TYPE_JSONB)
{
- // Array of JsonbField is special case
- get_finalized_column().get(n, res);
- // here we will get a Array<String> Field or NULL, if it is
Array<String>, we need to convert it to Array<JsonbField>
- convert_array_string_to_array_jsonb(res);
- return;
- }
-
- // here is a special case for JsonbField
- if (least_common_type.get_base_type_id() == PrimitiveType::TYPE_JSONB)
{
- res = Field::create_field<TYPE_JSONB>(JsonbField());
- get_finalized_column().get(n, res);
- return;
- }
+ // JSONB is a special type, it's not a scalar type, we need to handle it
specially
+ // 1. we try to get the JSONB Field from ColumnString which has no JSONB
type info
+ // 2. Array of JSONB is a special type, we get from ColumnArray of
ColumnString, should convert from string Field to JSONB Field
+ if (is_finalized() && least_common_type.get_base_type_id() !=
PrimitiveType::TYPE_JSONB) {
// common type to get the field value
get_finalized_column().get(n, res);
return;
@@ -925,11 +911,20 @@ void ColumnVariant::Subcolumn::get(size_t n, Field& res)
const {
const auto& part = data[i];
const auto& part_type = data_types[i];
if (ind < part->size()) {
- res = vectorized::remove_nullable(part_type)->get_default();
+ auto non_nullable_type = vectorized::remove_nullable(part_type);
+ bool is_nested_array_of_jsonb =
+ non_nullable_type->equals(*NESTED_TYPE_AS_ARRAY_OF_JSONB);
+
+ res = non_nullable_type->get_default();
part->get(ind, res);
- Field new_field;
- convert_field_to_type(res, *least_common_type.get(), &new_field);
- res = new_field;
+
+ if (is_nested_array_of_jsonb) {
+ convert_array_string_to_array_jsonb(res);
+ } else {
+ Field new_field;
+ convert_field_to_type(res, *least_common_type.get(),
&new_field);
+ res = new_field;
+ }
return;
}
@@ -1821,6 +1816,10 @@ const DataTypePtr ColumnVariant::NESTED_TYPE =
std::make_shared<vectorized::Data
std::make_shared<vectorized::DataTypeArray>(std::make_shared<vectorized::DataTypeNullable>(
std::make_shared<vectorized::DataTypeVariant>())));
+const DataTypePtr ColumnVariant::NESTED_TYPE_AS_ARRAY_OF_JSONB =
+
std::make_shared<vectorized::DataTypeArray>(std::make_shared<vectorized::DataTypeNullable>(
+ std::make_shared<vectorized::DataTypeJsonb>()));
+
DataTypePtr ColumnVariant::get_root_type() const {
return subcolumns.get_root()->data.get_least_common_type();
}
diff --git a/be/src/vec/columns/column_variant.h
b/be/src/vec/columns/column_variant.h
index 506d5bd589a..9d7e044f8c9 100644
--- a/be/src/vec/columns/column_variant.h
+++ b/be/src/vec/columns/column_variant.h
@@ -96,6 +96,8 @@ public:
constexpr static PrimitiveType MOST_COMMON_TYPE_ID =
PrimitiveType::TYPE_JSONB;
// Nullable(Array(Nullable(Object)))
const static DataTypePtr NESTED_TYPE;
+ // Array(Nullable(Jsonb))
+ const static DataTypePtr NESTED_TYPE_AS_ARRAY_OF_JSONB;
// Finlize mode for subcolumns, write mode will estimate which subcolumns
are sparse columns(too many null values inside column),
// merge and encode them into a shared column in root column. Only affects
in flush block to segments.
diff --git a/be/test/vec/columns/column_variant_test.cpp
b/be/test/vec/columns/column_variant_test.cpp
index ebedde391c0..f2abd646720 100644
--- a/be/test/vec/columns/column_variant_test.cpp
+++ b/be/test/vec/columns/column_variant_test.cpp
@@ -25,9 +25,12 @@
#include <memory>
#include "runtime/define_primitive_type.h"
+#include "runtime/jsonb_value.h"
#include "vec/columns/common_column_test.h"
+#include "vec/core/field.h"
#include "vec/data_types/data_type_factory.hpp"
#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_nullable.h"
#include "vec/json/path_in_data.h"
namespace doris::vectorized {
@@ -351,4 +354,134 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
}
}
+TEST_F(ColumnVariantTest, test_nested_array_of_jsonb_get) {
+ // Test case: Create a ColumnVariant with subcolumn type Array<JSONB>
+
+ // Create a ColumnVariant with subcolumns
+ auto variant_column = ColumnVariant::create(true);
+
+ // Add subcolumn with path "nested.array"
+ variant_column->add_sub_column(PathInData("nested.array"), 0);
+
+ // Get the subcolumn and manually set its type to Array<JSONB>
+ auto* subcolumn =
variant_column->get_subcolumn(PathInData("nested.array"));
+ ASSERT_NE(subcolumn, nullptr);
+
+ // Create test data: Array of strings
+ Field array_of_strings = Field::create_field<TYPE_ARRAY>(Array());
+
+ // Add string elements to the array
+ std::string test_data1 = R"("a")";
+ std::string test_data2 = R"(b)";
+
+
array_of_strings.get<Array&>().push_back(Field::create_field<TYPE_STRING>(test_data1));
+
array_of_strings.get<Array&>().push_back(Field::create_field<TYPE_STRING>(test_data2));
+
+ // Insert the array field into the subcolumn
+ subcolumn->insert(array_of_strings);
+
+ // Test 1: the column and test get method
+ {
+ EXPECT_TRUE(variant_column->is_finalized());
+ // check the subcolumn get method
+ Field result;
+ EXPECT_NO_THROW(subcolumn->get(0, result));
+
+ // Verify the result is still an array
+ EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
+
+ const auto& result_array = result.get<const Array&>();
+ EXPECT_EQ(result_array.size(), 2);
+
+ // Check that all elements are JSONB fields
+ for (const auto& item : result_array) {
+ EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_STRING);
+ }
+
+ // Verify string content is preserved
+ const auto& string1 = result_array[0].get<const String&>();
+ const auto& string2 = result_array[1].get<const String&>();
+
+ EXPECT_EQ(string1, R"("a")"); // "\"a\""
+ EXPECT_EQ(string2, R"(b)"); // "b"
+ }
+
+ // Test 2: Test with a row of different type of array to test the
subcolumn get method
+ {
+ // Add another row with different int array
+ Field int_array = Field::create_field<TYPE_ARRAY>(Array());
+ int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(1));
+ int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(2));
+ int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(3));
+
+ // and we should add more data to the subcolumn column
+ subcolumn->insert(int_array);
+
+ EXPECT_FALSE(variant_column->is_finalized());
+ // check the subcolumn get method
+ Field result;
+ EXPECT_NO_THROW(subcolumn->get(1, result));
+ EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
+ const auto& result_array = result.get<const Array&>();
+ EXPECT_EQ(result_array.size(), 3);
+ EXPECT_EQ(result_array[0].get_type(), PrimitiveType::TYPE_JSONB);
+ EXPECT_EQ(result_array[1].get_type(), PrimitiveType::TYPE_JSONB);
+ EXPECT_EQ(result_array[2].get_type(), PrimitiveType::TYPE_JSONB);
+
+ // check the first row Field is a string
+ Field result_string;
+ EXPECT_NO_THROW(subcolumn->get(0, result_string));
+ EXPECT_EQ(result_string.get_type(), PrimitiveType::TYPE_ARRAY);
+ const auto& result_string_array = result_string.get<const Array&>();
+ EXPECT_EQ(result_string_array.size(), 2);
+ EXPECT_EQ(result_string_array[0].get_type(),
PrimitiveType::TYPE_JSONB);
+ EXPECT_EQ(result_string_array[1].get_type(),
PrimitiveType::TYPE_JSONB);
+
+ // Finalize -> we should get the least common type of the subcolumn
+ variant_column->finalize();
+ EXPECT_TRUE(variant_column->is_finalized());
+ // we should get another subcolumn from the variant column
+ auto* subcolumn_finalized =
variant_column->get_subcolumn(PathInData("nested.array"));
+ ASSERT_NE(subcolumn_finalized, nullptr);
+ // check the subcolumn_finalized get method
+ Field result1, result2;
+ EXPECT_NO_THROW(subcolumn_finalized->get(0, result1));
+ EXPECT_NO_THROW(subcolumn_finalized->get(1, result2));
+
+ // Verify both results are arrays
+ EXPECT_EQ(result1.get_type(), PrimitiveType::TYPE_ARRAY);
+ EXPECT_EQ(result2.get_type(), PrimitiveType::TYPE_ARRAY);
+
+ const auto& array1 = result1.get<const Array&>();
+ const auto& array2 = result2.get<const Array&>();
+
+ EXPECT_EQ(array1.size(), 2);
+ EXPECT_EQ(array2.size(), 3);
+
+ // Verify all elements are JSONB
+ for (const auto& item : array1) {
+ EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_JSONB);
+ }
+ for (const auto& item : array2) {
+ EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_JSONB);
+ }
+ }
+
+ // Test 4: Test with empty array
+ {
+ auto* subcolumn =
variant_column->get_subcolumn(PathInData("nested.array"));
+ ASSERT_NE(subcolumn, nullptr);
+ Field empty_array_field = Field::create_field<TYPE_ARRAY>(Array());
+ subcolumn->insert(empty_array_field);
+
+ EXPECT_TRUE(variant_column->is_finalized());
+ // check the subcolumn get method
+ Field result;
+ EXPECT_NO_THROW(subcolumn->get(2, result));
+ EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
+ const auto& result_array = result.get<const Array&>();
+ EXPECT_EQ(result_array.size(), 0);
+ }
+}
+
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]