This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 72726031077 [fix](variant)fix variant subcolumn without finalize in 
get behavior (#53418)
72726031077 is described below

commit 727260310778100c17fc7420f12905aa457f02df
Author: amory <[email protected]>
AuthorDate: Fri Jul 18 14:00:41 2025 +0800

    [fix](variant)fix variant subcolumn without finalize in get behavior 
(#53418)
    
    fix variant subcolumn without finalize in get behavior
---
 be/src/vec/columns/column_variant.cpp       |  43 +++++----
 be/src/vec/columns/column_variant.h         |   2 +
 be/test/vec/columns/column_variant_test.cpp | 133 ++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 22 deletions(-)

diff --git a/be/src/vec/columns/column_variant.cpp 
b/be/src/vec/columns/column_variant.cpp
index f41c066c256..49e9559cdc9 100644
--- a/be/src/vec/columns/column_variant.cpp
+++ b/be/src/vec/columns/column_variant.cpp
@@ -890,25 +890,11 @@ void ColumnVariant::Subcolumn::get(size_t n, Field& res) 
const {
         res = Field();
         return;
     }
-    if (is_finalized()) {
-        // TODO(hangyu) : we should use data type to get the field value
-        // here is a special case for Array<JsonbField>
-        if (least_common_type.get_type_id() == PrimitiveType::TYPE_ARRAY &&
-            least_common_type.get_base_type_id() == PrimitiveType::TYPE_JSONB) 
{
-            // Array of JsonbField is special case
-            get_finalized_column().get(n, res);
-            // here we will get a Array<String> Field or NULL, if it is 
Array<String>, we need to convert it to Array<JsonbField>
-            convert_array_string_to_array_jsonb(res);
-            return;
-        }
-
-        // here is a special case for JsonbField
-        if (least_common_type.get_base_type_id() == PrimitiveType::TYPE_JSONB) 
{
-            res = Field::create_field<TYPE_JSONB>(JsonbField());
-            get_finalized_column().get(n, res);
-            return;
-        }
 
+    // JSONB is a special type, it's not a scalar type, we need to handle it 
specially
+    // 1. we try to get the JSONB Field from ColumnString which has no JSONB 
type info
+    // 2. Array of JSONB is a special type, we get from ColumnArray of 
ColumnString, should convert from string Field to JSONB Field
+    if (is_finalized() && least_common_type.get_base_type_id() != 
PrimitiveType::TYPE_JSONB) {
         // common type to get the field value
         get_finalized_column().get(n, res);
         return;
@@ -925,11 +911,20 @@ void ColumnVariant::Subcolumn::get(size_t n, Field& res) 
const {
         const auto& part = data[i];
         const auto& part_type = data_types[i];
         if (ind < part->size()) {
-            res = vectorized::remove_nullable(part_type)->get_default();
+            auto non_nullable_type = vectorized::remove_nullable(part_type);
+            bool is_nested_array_of_jsonb =
+                    non_nullable_type->equals(*NESTED_TYPE_AS_ARRAY_OF_JSONB);
+
+            res = non_nullable_type->get_default();
             part->get(ind, res);
-            Field new_field;
-            convert_field_to_type(res, *least_common_type.get(), &new_field);
-            res = new_field;
+
+            if (is_nested_array_of_jsonb) {
+                convert_array_string_to_array_jsonb(res);
+            } else {
+                Field new_field;
+                convert_field_to_type(res, *least_common_type.get(), 
&new_field);
+                res = new_field;
+            }
             return;
         }
 
@@ -1821,6 +1816,10 @@ const DataTypePtr ColumnVariant::NESTED_TYPE = 
std::make_shared<vectorized::Data
         
std::make_shared<vectorized::DataTypeArray>(std::make_shared<vectorized::DataTypeNullable>(
                 std::make_shared<vectorized::DataTypeVariant>())));
 
+const DataTypePtr ColumnVariant::NESTED_TYPE_AS_ARRAY_OF_JSONB =
+        
std::make_shared<vectorized::DataTypeArray>(std::make_shared<vectorized::DataTypeNullable>(
+                std::make_shared<vectorized::DataTypeJsonb>()));
+
 DataTypePtr ColumnVariant::get_root_type() const {
     return subcolumns.get_root()->data.get_least_common_type();
 }
diff --git a/be/src/vec/columns/column_variant.h 
b/be/src/vec/columns/column_variant.h
index 506d5bd589a..9d7e044f8c9 100644
--- a/be/src/vec/columns/column_variant.h
+++ b/be/src/vec/columns/column_variant.h
@@ -96,6 +96,8 @@ public:
     constexpr static PrimitiveType MOST_COMMON_TYPE_ID = 
PrimitiveType::TYPE_JSONB;
     // Nullable(Array(Nullable(Object)))
     const static DataTypePtr NESTED_TYPE;
+    // Array(Nullable(Jsonb))
+    const static DataTypePtr NESTED_TYPE_AS_ARRAY_OF_JSONB;
 
     // Finlize mode for subcolumns, write mode will estimate which subcolumns 
are sparse columns(too many null values inside column),
     // merge and encode them into a shared column in root column. Only affects 
in flush block to segments.
diff --git a/be/test/vec/columns/column_variant_test.cpp 
b/be/test/vec/columns/column_variant_test.cpp
index ebedde391c0..f2abd646720 100644
--- a/be/test/vec/columns/column_variant_test.cpp
+++ b/be/test/vec/columns/column_variant_test.cpp
@@ -25,9 +25,12 @@
 #include <memory>
 
 #include "runtime/define_primitive_type.h"
+#include "runtime/jsonb_value.h"
 #include "vec/columns/common_column_test.h"
+#include "vec/core/field.h"
 #include "vec/data_types/data_type_factory.hpp"
 #include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_nullable.h"
 #include "vec/json/path_in_data.h"
 
 namespace doris::vectorized {
@@ -351,4 +354,134 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
     }
 }
 
+TEST_F(ColumnVariantTest, test_nested_array_of_jsonb_get) {
+    // Test case: Create a ColumnVariant with subcolumn type Array<JSONB>
+
+    // Create a ColumnVariant with subcolumns
+    auto variant_column = ColumnVariant::create(true);
+
+    // Add subcolumn with path "nested.array"
+    variant_column->add_sub_column(PathInData("nested.array"), 0);
+
+    // Get the subcolumn and manually set its type to Array<JSONB>
+    auto* subcolumn = 
variant_column->get_subcolumn(PathInData("nested.array"));
+    ASSERT_NE(subcolumn, nullptr);
+
+    // Create test data: Array of strings
+    Field array_of_strings = Field::create_field<TYPE_ARRAY>(Array());
+
+    // Add string elements to the array
+    std::string test_data1 = R"("a")";
+    std::string test_data2 = R"(b)";
+
+    
array_of_strings.get<Array&>().push_back(Field::create_field<TYPE_STRING>(test_data1));
+    
array_of_strings.get<Array&>().push_back(Field::create_field<TYPE_STRING>(test_data2));
+
+    // Insert the array field into the subcolumn
+    subcolumn->insert(array_of_strings);
+
+    // Test 1:  the column and test get method
+    {
+        EXPECT_TRUE(variant_column->is_finalized());
+        // check the subcolumn get method
+        Field result;
+        EXPECT_NO_THROW(subcolumn->get(0, result));
+
+        // Verify the result is still an array
+        EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
+
+        const auto& result_array = result.get<const Array&>();
+        EXPECT_EQ(result_array.size(), 2);
+
+        // Check that all elements are JSONB fields
+        for (const auto& item : result_array) {
+            EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_STRING);
+        }
+
+        // Verify string content is preserved
+        const auto& string1 = result_array[0].get<const String&>();
+        const auto& string2 = result_array[1].get<const String&>();
+
+        EXPECT_EQ(string1, R"("a")"); // "\"a\""
+        EXPECT_EQ(string2, R"(b)");   // "b"
+    }
+
+    // Test 2: Test with a row of different type of array to test the 
subcolumn get method
+    {
+        // Add another row with different int array
+        Field int_array = Field::create_field<TYPE_ARRAY>(Array());
+        int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(1));
+        int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(2));
+        int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(3));
+
+        // and we should add more data to the subcolumn column
+        subcolumn->insert(int_array);
+
+        EXPECT_FALSE(variant_column->is_finalized());
+        // check the subcolumn get method
+        Field result;
+        EXPECT_NO_THROW(subcolumn->get(1, result));
+        EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
+        const auto& result_array = result.get<const Array&>();
+        EXPECT_EQ(result_array.size(), 3);
+        EXPECT_EQ(result_array[0].get_type(), PrimitiveType::TYPE_JSONB);
+        EXPECT_EQ(result_array[1].get_type(), PrimitiveType::TYPE_JSONB);
+        EXPECT_EQ(result_array[2].get_type(), PrimitiveType::TYPE_JSONB);
+
+        // check the first row Field is a string
+        Field result_string;
+        EXPECT_NO_THROW(subcolumn->get(0, result_string));
+        EXPECT_EQ(result_string.get_type(), PrimitiveType::TYPE_ARRAY);
+        const auto& result_string_array = result_string.get<const Array&>();
+        EXPECT_EQ(result_string_array.size(), 2);
+        EXPECT_EQ(result_string_array[0].get_type(), 
PrimitiveType::TYPE_JSONB);
+        EXPECT_EQ(result_string_array[1].get_type(), 
PrimitiveType::TYPE_JSONB);
+
+        // Finalize -> we should get the least common type of the subcolumn
+        variant_column->finalize();
+        EXPECT_TRUE(variant_column->is_finalized());
+        // we should get another subcolumn from the variant column
+        auto* subcolumn_finalized = 
variant_column->get_subcolumn(PathInData("nested.array"));
+        ASSERT_NE(subcolumn_finalized, nullptr);
+        // check the subcolumn_finalized get method
+        Field result1, result2;
+        EXPECT_NO_THROW(subcolumn_finalized->get(0, result1));
+        EXPECT_NO_THROW(subcolumn_finalized->get(1, result2));
+
+        // Verify both results are arrays
+        EXPECT_EQ(result1.get_type(), PrimitiveType::TYPE_ARRAY);
+        EXPECT_EQ(result2.get_type(), PrimitiveType::TYPE_ARRAY);
+
+        const auto& array1 = result1.get<const Array&>();
+        const auto& array2 = result2.get<const Array&>();
+
+        EXPECT_EQ(array1.size(), 2);
+        EXPECT_EQ(array2.size(), 3);
+
+        // Verify all elements are JSONB
+        for (const auto& item : array1) {
+            EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_JSONB);
+        }
+        for (const auto& item : array2) {
+            EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_JSONB);
+        }
+    }
+
+    // Test 4: Test with empty array
+    {
+        auto* subcolumn = 
variant_column->get_subcolumn(PathInData("nested.array"));
+        ASSERT_NE(subcolumn, nullptr);
+        Field empty_array_field = Field::create_field<TYPE_ARRAY>(Array());
+        subcolumn->insert(empty_array_field);
+
+        EXPECT_TRUE(variant_column->is_finalized());
+        // check the subcolumn get method
+        Field result;
+        EXPECT_NO_THROW(subcolumn->get(2, result));
+        EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
+        const auto& result_array = result.get<const Array&>();
+        EXPECT_EQ(result_array.size(), 0);
+    }
+}
+
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to