This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new c451863cf88 branch-3.0: [fix](variant)fix variant type conflicts in
nested types (#52696) (#53123)
c451863cf88 is described below
commit c451863cf887fbe745a58f6fd3f3eb58ceecfd47
Author: amory <[email protected]>
AuthorDate: Fri Jul 18 13:58:51 2025 +0800
branch-3.0: [fix](variant)fix variant type conflicts in nested types
(#52696) (#53123)
cherry-pick: https://github.com/apache/doris/pull/52696 and #53418
---
be/src/vec/columns/column_object.cpp | 55 ++-
be/src/vec/columns/column_object.h | 6 +
be/src/vec/data_types/convert_field_to_type.cpp | 11 +-
be/src/vec/functions/function_cast.h | 30 +-
be/src/vec/json/json_parser.cpp | 12 +-
be/src/vec/json/json_parser.h | 2 +
be/test/vec/columns/column_object_test.cpp | 130 +++++
.../function/cast/function_variant_cast_test.cpp | 466 ++++++++++++++++++
be/test/vec/jsonb/convert_field_to_type_test.cpp | 521 +++++++++++++++++++++
be/test/vec/jsonb/json_parser_test.cpp | 172 +++++++
regression-test/data/variant_p0/desc.out | Bin 5887 -> 5908 bytes
regression-test/data/variant_p0/nested2.out | Bin 0 -> 4088 bytes
regression-test/suites/variant_p0/load.groovy | 1 +
regression-test/suites/variant_p0/nested2.groovy | 151 ++++++
14 files changed, 1528 insertions(+), 29 deletions(-)
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index f14f0c62deb..196d16dbe58 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -85,12 +85,6 @@ namespace doris::vectorized {
namespace {
DataTypePtr create_array_of_type(TypeIndex type, size_t num_dimensions, bool
is_nullable) {
- if (type == ColumnObject::MOST_COMMON_TYPE_ID) {
- // JSONB type MUST NOT wrapped in ARRAY column, it should be top level.
- // So we ignored num_dimensions.
- return is_nullable ?
make_nullable(std::make_shared<ColumnObject::MostCommonType>())
- : std::make_shared<ColumnObject::MostCommonType>();
- }
DataTypePtr result = DataTypeFactory::instance().create_data_type(type,
is_nullable);
for (size_t i = 0; i < num_dimensions; ++i) {
result = std::make_shared<DataTypeArray>(result);
@@ -945,11 +939,12 @@ void ColumnObject::Subcolumn::get(size_t n, Field& res)
const {
res = Null();
return;
}
- if (is_finalized()) {
- if (least_common_type.get_base_type_id() == TypeIndex::JSONB) {
- // JsonbFiled is special case
- res = JsonbField();
- }
+
+ // JSONB is a special type, it's not a scalar type, we need to handle it
specially
+ // 1. we try to get the JSONB Field from ColumnString which has no JSONB
type info
+ // 2. Array of JSONB is a special type, we get from ColumnArray of
ColumnString, should convert from string Field to JSONB Field
+ if (is_finalized() && least_common_type.get_base_type_id() !=
TypeIndex::JSONB) {
+ // common type to get the field value
get_finalized_column().get(n, res);
return;
}
@@ -965,11 +960,20 @@ void ColumnObject::Subcolumn::get(size_t n, Field& res)
const {
const auto& part = data[i];
const auto& part_type = data_types[i];
if (ind < part->size()) {
- res = vectorized::remove_nullable(part_type)->get_default();
+ auto non_nullable_type = vectorized::remove_nullable(part_type);
+ bool is_nested_array_of_jsonb =
+ non_nullable_type->equals(*NESTED_TYPE_AS_ARRAY_OF_JSONB);
+
+ res = non_nullable_type->get_default();
part->get(ind, res);
- Field new_field;
- convert_field_to_type(res, *least_common_type.get(), &new_field);
- res = new_field;
+
+ if (is_nested_array_of_jsonb) {
+ convert_array_string_to_array_jsonb(res);
+ } else {
+ Field new_field;
+ convert_field_to_type(res, *least_common_type.get(),
&new_field);
+ res = new_field;
+ }
return;
}
@@ -1861,6 +1865,10 @@ const DataTypePtr ColumnObject::NESTED_TYPE =
std::make_shared<vectorized::DataT
std::make_shared<vectorized::DataTypeArray>(std::make_shared<vectorized::DataTypeNullable>(
std::make_shared<vectorized::DataTypeObject>())));
+const DataTypePtr ColumnObject::NESTED_TYPE_AS_ARRAY_OF_JSONB =
+
std::make_shared<vectorized::DataTypeArray>(std::make_shared<vectorized::DataTypeNullable>(
+ std::make_shared<vectorized::DataTypeJsonb>()));
+
DataTypePtr ColumnObject::get_root_type() const {
return subcolumns.get_root()->data.get_least_common_type();
}
@@ -2055,4 +2063,21 @@ bool ColumnObject::try_insert_default_from_nested(const
Subcolumns::NodePtr& ent
return true;
}
+void ColumnObject::Subcolumn::convert_array_string_to_array_jsonb(Field&
array_field) {
+ if (array_field.is_null()) {
+ return;
+ }
+ if (array_field.get_type() != Field::Types::Array) {
+ return;
+ }
+ Field converted_res = Array();
+ for (auto& item : array_field.get<Array&>()) {
+ DCHECK(item.get_type() == Field::Types::String);
+ auto& string_item = item.get<String&>();
+ Field jsonb_item = JsonbField(string_item.c_str(), string_item.size());
+ converted_res.get<Array&>().emplace_back(std::move(jsonb_item));
+ }
+ array_field = std::move(converted_res);
+}
+
} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 16fe3430313..03e14054d2e 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -97,6 +97,9 @@ public:
constexpr static TypeIndex MOST_COMMON_TYPE_ID = TypeIndex::JSONB;
// Nullable(Array(Nullable(Object)))
const static DataTypePtr NESTED_TYPE;
+ // Array(Nullable(Jsonb))
+ const static DataTypePtr NESTED_TYPE_AS_ARRAY_OF_JSONB;
+
// Finlize mode for subcolumns, write mode will estimate which subcolumns
are sparse columns(too many null values inside column),
// merge and encode them into a shared column in root column. Only affects
in flush block to segments.
// Otherwise read mode should be as default mode.
@@ -177,6 +180,9 @@ public:
void add_new_column_part(DataTypePtr type);
+ /// Converts Array<String> to Array<JsonbField> for special case
handling
+ static void convert_array_string_to_array_jsonb(Field& array_field);
+
friend class ColumnObject;
private:
diff --git a/be/src/vec/data_types/convert_field_to_type.cpp
b/be/src/vec/data_types/convert_field_to_type.cpp
index ecbce03ba6b..9141bb95862 100644
--- a/be/src/vec/data_types/convert_field_to_type.cpp
+++ b/be/src/vec/data_types/convert_field_to_type.cpp
@@ -33,6 +33,7 @@
#include "common/exception.h"
#include "common/status.h"
#include "util/bitmap_value.h"
+#include "util/jsonb_document.h"
#include "util/jsonb_writer.h"
#include "vec/common/field_visitors.h"
#include "vec/common/typeid_cast.h"
@@ -111,6 +112,11 @@ public:
writer->writeString(x);
writer->writeEndString();
}
+ void operator()(const JsonbField& x, JsonbWriter* writer) const {
+ JsonbDocument* doc;
+ THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(x.get_value(),
x.get_size(), &doc));
+ writer->writeValue(doc->getValue());
+ }
void operator()(const Array& x, JsonbWriter* writer) const;
void operator()(const Tuple& x, JsonbWriter* writer) const {
@@ -146,9 +152,6 @@ public:
void operator()(const Map& x, JsonbWriter* writer) const {
throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not
implemeted");
}
- void operator()(const JsonbField& x, JsonbWriter* writer) const {
- throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not
implemeted");
- }
};
void FieldVisitorToJsonb::operator()(const Array& x, JsonbWriter* writer)
const {
@@ -316,4 +319,4 @@ void convert_field_to_type(const Field& from_value, const
IDataType& to_type, Fi
return convert_field_to_typeImpl(from_value, to_type, from_type_hint,
to);
}
}
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 99f5f625971..0e7db6e407b 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -850,7 +850,7 @@ struct ConvertNothingToJsonb {
}
};
-template <TypeIndex type_index, typename ColumnType>
+template <TypeIndex type_index, typename ColumnType, typename ToDataType>
struct ConvertImplFromJsonb {
static Status execute(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
const size_t result, size_t input_rows_count) {
@@ -897,6 +897,18 @@ struct ConvertImplFromJsonb {
res[i] = 0;
continue;
}
+
+ // if value is string, convert by parse, otherwise the result
is null if ToDataType is not string
+ if (value->isString()) {
+ const auto* blob = static_cast<const JsonbBlobVal*>(value);
+ const auto& data = blob->getBlob();
+ size_t len = blob->getBlobLen();
+ ReadBuffer rb((char*)(data), len);
+ bool parsed = try_parse_impl<ToDataType>(res[i], rb,
context);
+ null_map[i] = !parsed;
+ continue;
+ }
+
if constexpr (type_index == TypeIndex::UInt8) {
// cast from json value to boolean type
if (value->isTrue()) {
@@ -1991,22 +2003,22 @@ private:
bool jsonb_string_as_string) const {
switch (to_type->get_type_id()) {
case TypeIndex::UInt8:
- return &ConvertImplFromJsonb<TypeIndex::UInt8,
ColumnUInt8>::execute;
+ return &ConvertImplFromJsonb<TypeIndex::UInt8, ColumnUInt8,
DataTypeUInt8>::execute;
case TypeIndex::Int8:
- return &ConvertImplFromJsonb<TypeIndex::Int8, ColumnInt8>::execute;
+ return &ConvertImplFromJsonb<TypeIndex::Int8, ColumnInt8,
DataTypeInt8>::execute;
case TypeIndex::Int16:
- return &ConvertImplFromJsonb<TypeIndex::Int16,
ColumnInt16>::execute;
+ return &ConvertImplFromJsonb<TypeIndex::Int16, ColumnInt16,
DataTypeInt16>::execute;
case TypeIndex::Int32:
- return &ConvertImplFromJsonb<TypeIndex::Int32,
ColumnInt32>::execute;
+ return &ConvertImplFromJsonb<TypeIndex::Int32, ColumnInt32,
DataTypeInt32>::execute;
case TypeIndex::Int64:
- return &ConvertImplFromJsonb<TypeIndex::Int64,
ColumnInt64>::execute;
+ return &ConvertImplFromJsonb<TypeIndex::Int64, ColumnInt64,
DataTypeInt64>::execute;
case TypeIndex::Int128:
- return &ConvertImplFromJsonb<TypeIndex::Int128,
ColumnInt128>::execute;
+ return &ConvertImplFromJsonb<TypeIndex::Int128, ColumnInt128,
DataTypeInt128>::execute;
case TypeIndex::Float64:
- return &ConvertImplFromJsonb<TypeIndex::Float64,
ColumnFloat64>::execute;
+ return &ConvertImplFromJsonb<TypeIndex::Float64, ColumnFloat64,
+ DataTypeFloat64>::execute;
case TypeIndex::String:
if (!jsonb_string_as_string) {
- // Conversion from String through parsing.
return &ConvertImplGenericToString::execute2;
} else {
return ConvertImplGenericFromJsonb::execute;
diff --git a/be/src/vec/json/json_parser.cpp b/be/src/vec/json/json_parser.cpp
index f6e8a65cc08..e031f168820 100644
--- a/be/src/vec/json/json_parser.cpp
+++ b/be/src/vec/json/json_parser.cpp
@@ -59,8 +59,14 @@ void JSONDataParser<ParserImpl>::traverse(const Element&
element, ParseContext&
if (element.isObject()) {
traverseObject(element.getObject(), ctx);
} else if (element.isArray()) {
+ if (ctx.has_nested_in_flatten) {
+ throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+ "Nesting of array in Nested array within
variant subcolumns is "
+ "currently not supported.");
+ }
has_nested = false;
check_has_nested_object(element);
+ ctx.has_nested_in_flatten = has_nested && ctx.enable_flatten_nested;
if (has_nested && !ctx.enable_flatten_nested) {
// Parse nested arrays to JsonbField
JsonbWriter writer;
@@ -71,6 +77,8 @@ void JSONDataParser<ParserImpl>::traverse(const Element&
element, ParseContext&
} else {
traverseArray(element.getArray(), ctx);
}
+ // we should set has_nested_in_flatten to false when traverse array
finished for next array otherwise it will be true for next array
+ ctx.has_nested_in_flatten = false;
} else {
ctx.paths.push_back(ctx.builder.get_parts());
ctx.values.push_back(getValueAsField(element));
@@ -137,6 +145,7 @@ template <typename ParserImpl>
void JSONDataParser<ParserImpl>::traverseArray(const JSONArray& array,
ParseContext& ctx) {
/// Traverse elements of array and collect an array of fields by each path.
ParseArrayContext array_ctx;
+ array_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten;
array_ctx.total_size = array.size();
for (auto it = array.begin(); it != array.end(); ++it) {
traverseArrayElement(*it, array_ctx);
@@ -162,8 +171,9 @@ template <typename ParserImpl>
void JSONDataParser<ParserImpl>::traverseArrayElement(const Element& element,
ParseArrayContext& ctx) {
ParseContext element_ctx;
+ element_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten;
traverse(element, element_ctx);
- auto& [_, paths, values, flatten_nested] = element_ctx;
+ auto& [_, paths, values, flatten_nested, has_nested] = element_ctx;
size_t size = paths.size();
size_t keys_to_update = ctx.arrays_by_path.size();
for (size_t i = 0; i < size; ++i) {
diff --git a/be/src/vec/json/json_parser.h b/be/src/vec/json/json_parser.h
index c1815ae5be4..401f225d000 100644
--- a/be/src/vec/json/json_parser.h
+++ b/be/src/vec/json/json_parser.h
@@ -148,6 +148,7 @@ private:
std::vector<PathInData::Parts> paths;
std::vector<Field> values;
bool enable_flatten_nested = false;
+ bool has_nested_in_flatten = false;
};
using PathPartsWithArray = std::pair<PathInData::Parts, Array>;
using PathToArray = phmap::flat_hash_map<UInt128, PathPartsWithArray,
UInt128TrivialHash>;
@@ -157,6 +158,7 @@ private:
size_t total_size = 0;
PathToArray arrays_by_path;
KeyToSizes nested_sizes_by_key;
+ bool has_nested_in_flatten = false;
};
void traverse(const Element& element, ParseContext& ctx);
void traverseObject(const JSONObject& object, ParseContext& ctx);
diff --git a/be/test/vec/columns/column_object_test.cpp
b/be/test/vec/columns/column_object_test.cpp
index a7498e82e87..21c533e63d6 100644
--- a/be/test/vec/columns/column_object_test.cpp
+++ b/be/test/vec/columns/column_object_test.cpp
@@ -165,4 +165,134 @@ TEST_F(ColumnObjectTest, test_pop_back_multiple_types) {
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nothing");
}
+TEST_F(ColumnObjectTest, test_nested_array_of_jsonb_get) {
+ // Test case: Create a ColumnObject with subcolumn type Array<JSONB>
+
+ // Create a ColumnObject with subcolumns
+ auto variant_column = ColumnObject::create(true);
+
+ // Add subcolumn with path "nested.array"
+ variant_column->add_sub_column(PathInData("nested.array"), 0);
+
+ // Get the subcolumn and manually set its type to Array<JSONB>
+ auto* subcolumn =
variant_column->get_subcolumn(PathInData("nested.array"));
+ ASSERT_NE(subcolumn, nullptr);
+
+ // Create test data: Array of strings
+ Field array_of_strings = Array();
+
+ // Add string elements to the array
+ std::string test_data1 = R"("a")";
+ std::string test_data2 = R"(b)";
+
+ array_of_strings.get<Array&>().emplace_back(test_data1);
+ array_of_strings.get<Array&>().emplace_back(test_data2);
+
+ // Insert the array field into the subcolumn
+ subcolumn->insert(array_of_strings);
+
+ // Test 1: the column and test get method
+ {
+ EXPECT_TRUE(variant_column->is_finalized());
+ // check the subcolumn get method
+ Field result;
+ EXPECT_NO_THROW(subcolumn->get(0, result));
+
+ // Verify the result is still an array
+ EXPECT_EQ(result.get_type(), doris::vectorized::Field::Types::Array);
+
+ const auto& result_array = result.get<const Array&>();
+ EXPECT_EQ(result_array.size(), 2);
+
+ // Check that all elements are JSONB fields
+ for (const auto& item : result_array) {
+ EXPECT_EQ(item.get_type(),
doris::vectorized::Field::Types::String);
+ }
+
+ // Verify string content is preserved
+ const auto& string1 = result_array[0].get<const String&>();
+ const auto& string2 = result_array[1].get<const String&>();
+
+ EXPECT_EQ(string1, R"("a")"); // "\"a\""
+ EXPECT_EQ(string2, R"(b)"); // "b"
+ }
+
+ // Test 2: Test with a row of different type of array to test the
subcolumn get method
+ {
+ // Add another row with different int array
+ Field int_array = Array();
+ int_array.get<Array&>().push_back(1);
+ int_array.get<Array&>().push_back(2);
+ int_array.get<Array&>().push_back(3);
+
+ // and we should add more data to the subcolumn column
+ subcolumn->insert(int_array);
+
+ EXPECT_FALSE(variant_column->is_finalized());
+ // check the subcolumn get method
+ Field result;
+ EXPECT_NO_THROW(subcolumn->get(1, result));
+ EXPECT_EQ(result.get_type(), doris::vectorized::Field::Types::Array);
+ const auto& result_array = result.get<const Array&>();
+ EXPECT_EQ(result_array.size(), 3);
+ EXPECT_EQ(result_array[0].get_type(),
doris::vectorized::Field::Types::JSONB);
+ EXPECT_EQ(result_array[1].get_type(),
doris::vectorized::Field::Types::JSONB);
+ EXPECT_EQ(result_array[2].get_type(),
doris::vectorized::Field::Types::JSONB);
+
+ // check the first row Field is a string
+ Field result_string;
+ EXPECT_NO_THROW(subcolumn->get(0, result_string));
+ EXPECT_EQ(result_string.get_type(),
doris::vectorized::Field::Types::Array);
+ const auto& result_string_array = result_string.get<const Array&>();
+ EXPECT_EQ(result_string_array.size(), 2);
+ EXPECT_EQ(result_string_array[0].get_type(),
doris::vectorized::Field::Types::JSONB);
+ EXPECT_EQ(result_string_array[1].get_type(),
doris::vectorized::Field::Types::JSONB);
+
+ // Finalize -> we should get the least common type of the subcolumn
+ variant_column->finalize();
+ EXPECT_TRUE(variant_column->is_finalized());
+ // we should get another subcolumn from the variant column
+ auto* subcolumn_finalized =
variant_column->get_subcolumn(PathInData("nested.array"));
+ ASSERT_NE(subcolumn_finalized, nullptr);
+ // check the subcolumn_finalized get method
+ Field result1, result2;
+ EXPECT_NO_THROW(subcolumn_finalized->get(0, result1));
+ EXPECT_NO_THROW(subcolumn_finalized->get(1, result2));
+
+ // Verify both results are arrays
+ EXPECT_EQ(result1.get_type(), doris::vectorized::Field::Types::Array);
+ EXPECT_EQ(result2.get_type(), doris::vectorized::Field::Types::Array);
+
+ const auto& array1 = result1.get<const Array&>();
+ const auto& array2 = result2.get<const Array&>();
+
+ EXPECT_EQ(array1.size(), 2);
+ EXPECT_EQ(array2.size(), 3);
+
+ // Verify all elements are JSONB
+ for (const auto& item : array1) {
+ EXPECT_EQ(item.get_type(), doris::vectorized::Field::Types::JSONB);
+ }
+ for (const auto& item : array2) {
+ EXPECT_EQ(item.get_type(), doris::vectorized::Field::Types::JSONB);
+ }
+ }
+
+ // Test 4: Test with empty array
+ {
+ auto* subcolumn =
variant_column->get_subcolumn(PathInData("nested.array"));
+ ASSERT_NE(subcolumn, nullptr);
+ Field empty_array_field = Array();
+ subcolumn->insert(empty_array_field);
+
+ EXPECT_TRUE(variant_column->is_finalized());
+ // check the subcolumn get method
+ Field result;
+ EXPECT_NO_THROW(subcolumn->get(2, result));
+ EXPECT_EQ(result.get_type(), doris::vectorized::Field::Types::Array);
+ const auto& result_array = result.get<const Array&>();
+ EXPECT_EQ(result_array.size(), 0);
+ }
+}
+
} // namespace doris::vectorized
diff --git a/be/test/vec/function/cast/function_variant_cast_test.cpp
b/be/test/vec/function/cast/function_variant_cast_test.cpp
new file mode 100644
index 00000000000..49a7dc49e15
--- /dev/null
+++ b/be/test/vec/function/cast/function_variant_cast_test.cpp
@@ -0,0 +1,466 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <vector>
+
+#include "common/status.h"
+#include "gtest/gtest_pred_impl.h"
+#include "olap/field.h"
+#include "runtime/define_primitive_type.h"
+#include "runtime/primitive_type.h"
+#include "runtime/runtime_state.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_object.h"
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_object.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+static doris::vectorized::Field construct_variant_map(
+ const std::vector<std::pair<std::string, doris::vectorized::Field>>&
key_and_values) {
+ doris::vectorized::Field res = VariantMap();
+ auto& object = res.get<VariantMap&>();
+ for (const auto& [k, v] : key_and_values) {
+ PathInData path(k);
+ object.try_emplace(path, v);
+ }
+ return res;
+}
+
+static auto construct_basic_varint_column() {
+ // 1. create an empty variant column
+ auto variant = ColumnObject::create(5);
+
+ std::vector<std::pair<std::string, doris::vectorized::Field>> data;
+
+ // 2. subcolumn path
+ data.emplace_back("v.a", 20);
+ data.emplace_back("v.b", "20");
+ data.emplace_back("v.c", 20);
+ data.emplace_back("v.f", 20);
+ data.emplace_back("v.e", "50");
+ for (int i = 0; i < 5; ++i) {
+ auto field = construct_variant_map(data);
+ variant->try_insert(field);
+ }
+
+ return variant;
+}
+
+TEST(FunctionVariantCast, CastToVariant) {
+ // Test casting from basic types to variant
+ {
+ // Test Int32 to variant
+ auto int32_type = std::make_shared<DataTypeInt32>();
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto int32_col = ColumnInt32::create();
+ int32_col->insert(42);
+ int32_col->insert(100);
+ int32_col->insert(-1);
+
+ ColumnsWithTypeAndName arguments {{int32_col->get_ptr(), int32_type,
"int32_col"},
+ {nullptr, variant_type,
"variant_type"}};
+
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, variant_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, variant_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
3).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* variant_col = assert_cast<const
ColumnObject*>(result_col.get());
+ ASSERT_EQ(variant_col->size(), 3);
+ }
+
+ // Test casting from string to variant
+ {
+ auto string_type = std::make_shared<DataTypeString>();
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto string_col = ColumnString::create();
+ string_col->insert_data("hello", 5);
+ string_col->insert_data("world", 5);
+
+ ColumnsWithTypeAndName arguments {{string_col->get_ptr(), string_type,
"string_col"},
+ {nullptr, variant_type,
"variant_type"}};
+
+ auto function = SimpleFunctionFactory::instance().get_function("CAST",
arguments,
+
make_nullable(variant_type));
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, variant_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
2).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* variant_col =
+ assert_cast<const
ColumnObject*>(remove_nullable(result_col).get());
+ ASSERT_EQ(variant_col->size(), 2);
+ }
+
+ // Test casting from array to variant
+ {
+ auto array_type =
std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>());
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto array_col =
+ ColumnArray::create(ColumnInt32::create(),
ColumnArray::ColumnOffsets::create());
+ auto& data = assert_cast<ColumnInt32&>(array_col->get_data());
+ auto& offsets = array_col->get_offsets();
+
+ data.insert(1);
+ data.insert(2);
+ data.insert(3);
+ offsets.push_back(3);
+
+ ColumnsWithTypeAndName arguments {{array_col->get_ptr(), array_type,
"array_col"},
+ {nullptr, variant_type,
"variant_type"}};
+
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, variant_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, variant_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
1).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* variant_col =
+ assert_cast<const
ColumnObject*>(remove_nullable(result_col).get());
+ ASSERT_EQ(variant_col->size(), 1);
+ }
+}
+
+TEST(FunctionVariantCast, CastFromVariant) {
+ // Test casting from variant to basic types
+ {
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto int32_type = std::make_shared<DataTypeInt32>();
+ auto variant_col = ColumnObject::create(true);
+
+ // Create a variant column with integer values
+ variant_col->create_root(int32_type, ColumnInt32::create());
+ MutableColumnPtr data = variant_col->get_root();
+ data->insert(42);
+ data->insert(100);
+ data->insert(-1);
+
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
+ {nullptr, int32_type, "int32_type"}};
+
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, int32_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, int32_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
3).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ // always nullable
+ const auto* int32_result =
+ assert_cast<const
ColumnInt32*>(remove_nullable(result_col).get());
+ ASSERT_EQ(int32_result->size(), 3);
+ ASSERT_EQ(int32_result->get_element(0), 42);
+ ASSERT_EQ(int32_result->get_element(1), 100);
+ ASSERT_EQ(int32_result->get_element(2), -1);
+ }
+
+ // Test casting from variant to string
+ {
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto string_type = std::make_shared<DataTypeString>();
+ auto variant_col = ColumnObject::create(true);
+
+ // Create a variant column with string values
+ variant_col->create_root(string_type, ColumnString::create());
+ MutableColumnPtr data = variant_col->get_root();
+ data->insert_data("hello", 5);
+ data->insert_data("world", 5);
+
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
+ {nullptr, string_type,
"string_type"}};
+
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, string_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, string_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
2).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* string_result =
+ assert_cast<const
ColumnString*>(remove_nullable(result_col).get());
+ ASSERT_EQ(string_result->size(), 2);
+ ASSERT_EQ(string_result->get_data_at(0).to_string(), "hello");
+ ASSERT_EQ(string_result->get_data_at(1).to_string(), "world");
+ }
+
+ // Test casting from variant to array
+ {
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto array_type =
std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>());
+ auto variant_col = ColumnObject::create(true);
+
+ // Create a variant column with array values
+ variant_col->create_root(
+ array_type,
+ ColumnArray::create(ColumnInt32::create(),
ColumnArray::ColumnOffsets::create()));
+ MutableColumnPtr data = variant_col->get_root();
+
+ Field a = Array {1, 2, 3};
+
+ data->insert(a);
+
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
+ {nullptr, array_type, "array_type"}};
+
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, array_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, array_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
1).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* array_result =
+ assert_cast<const
ColumnArray*>(remove_nullable(result_col).get());
+ ASSERT_EQ(array_result->size(), 1);
+ const auto& result_data = assert_cast<const
ColumnInt32&>(array_result->get_data());
+ ASSERT_EQ(result_data.size(), 3);
+ ASSERT_EQ(result_data.get_element(0), 1);
+ ASSERT_EQ(result_data.get_element(1), 2);
+ ASSERT_EQ(result_data.get_element(2), 3);
+ }
+}
+
+TEST(FunctionVariantCast, CastVariantWithNull) {
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto int32_type = std::make_shared<DataTypeInt32>();
+ auto nullable_int32_type = std::make_shared<DataTypeNullable>(int32_type);
+
+ // Create a variant column with nullable integer values
+ auto variant_col = ColumnObject::create(true);
+ variant_col->create_root(nullable_int32_type,
+ ColumnNullable::create(ColumnInt32::create(),
ColumnUInt8::create()));
+ MutableColumnPtr data = variant_col->get_root();
+
+ data->insert(42);
+ data->insert(Null());
+ data->insert(100);
+
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type,
"variant_col"},
+ {nullptr, nullable_int32_type,
"nullable_int32_type"}};
+
+ variant_col->finalize();
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST", arguments,
nullable_int32_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, nullable_int32_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
3).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* nullable_result = assert_cast<const
ColumnNullable*>(result_col.get());
+ ASSERT_EQ(nullable_result->size(), 3);
+
+ const auto& result_data = assert_cast<const
ColumnInt32&>(nullable_result->get_nested_column());
+ const auto& result_null_map = nullable_result->get_null_map_data();
+
+ ASSERT_EQ(result_data.get_element(0), 42);
+ ASSERT_EQ(result_null_map[0], 0);
+ ASSERT_EQ(result_null_map[1], 1);
+ ASSERT_EQ(result_data.get_element(2), 100);
+}
+
+TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
+ // Test case 1: variant.empty() branch
+ {
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto int32_type = std::make_shared<DataTypeInt32>();
+ MutableColumnPtr root = ColumnInt32::create();
+ root->insert(42);
+ vectorized::ColumnObject::Subcolumns dynamic_subcolumns;
+ dynamic_subcolumns.add(
+ vectorized::PathInData(ColumnObject::COLUMN_NAME_DUMMY),
+ vectorized::ColumnObject::Subcolumn {root->get_ptr(),
int32_type, true, true});
+ auto variant_col = ColumnObject::create(std::move(dynamic_subcolumns),
true);
+
+ variant_col->finalize();
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
+ {nullptr, int32_type, "int32_type"}};
+
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, int32_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, int32_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
1).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ // always nullable
+ const auto* int32_result =
+ assert_cast<const
ColumnInt32*>(remove_nullable(result_col).get());
+ ASSERT_EQ(int32_result->size(), 1);
+ // because of variant.empty() we insert_default with data_type_to
+ ASSERT_EQ(int32_result->get_element(0), 0);
+ }
+
+ // Test case 2: !data_type_to->is_nullable() &&
!WhichDataType(data_type_to).is_string() branch
+ {
+ // object has sparse column
+ auto int32_type = std::make_shared<DataTypeInt32>();
+ auto variant_col = construct_basic_varint_column();
+ auto variant_type = std::make_shared<DataTypeObject>();
+
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
+ {nullptr, int32_type, "int32_type"}};
+
+ variant_col->finalize();
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, int32_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, int32_type, "result"});
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
1).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* nullable_result = assert_cast<const
ColumnNullable*>(result_col.get());
+ ASSERT_EQ(nullable_result->size(), 1);
+ ASSERT_TRUE(nullable_result->is_null_at(0));
+ }
+
+ // Test case 3: WhichDataType(data_type_to).is_string() branch
+ {
+ // variant has sparse column
+ auto int32_type = std::make_shared<DataTypeInt32>();
+ auto variant_col = construct_basic_varint_column();
+
+ auto string_type = std::make_shared<DataTypeString>();
+ auto variant_type = std::make_shared<DataTypeObject>();
+
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
+ {nullptr, string_type,
"string_type"}};
+
+ variant_col->finalize();
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, string_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, string_type, "result"});
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
1).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* string_result = assert_cast<const
ColumnString*>(result_col.get());
+ // just call ConvertImplGenericToString which will insert all source
column data to ColumnString
+ ASSERT_EQ(string_result->size(), variant_col->size());
+ ASSERT_EQ(string_result->get_data_at(0).to_string(),
+
"{\"v\":{\"a\":20,\"b\":\"20\",\"c\":20,\"e\":\"50\",\"f\":20}}");
+ }
+
+ // Test case 4: else branch (nullable type)
+ {
+ auto variant_col = construct_basic_varint_column();
+ variant_col->finalize();
+ auto nullable_variant_col = make_nullable(variant_col->get_ptr());
+
+ auto nullable_string_type =
make_nullable(std::make_shared<DataTypeString>());
+ auto variant_type = std::make_shared<DataTypeObject>();
+ auto nullable_variant_type = make_nullable(variant_type);
+
+ ColumnsWithTypeAndName arguments {
+ {nullable_variant_col->get_ptr(), nullable_variant_type,
"variant_col"},
+ {nullptr, nullable_string_type, "nullable_string_type"}};
+
+ auto function = SimpleFunctionFactory::instance().get_function("CAST",
arguments,
+
nullable_string_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, nullable_string_type, "result"});
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
1).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+ const auto* nullable_result = assert_cast<const
ColumnNullable*>(result_col.get());
+ ASSERT_EQ(nullable_result->size(), 1);
+ ASSERT_TRUE(nullable_result->is_null_at(1));
+ }
+}
+
+} // namespace doris::vectorized
diff --git a/be/test/vec/jsonb/convert_field_to_type_test.cpp
b/be/test/vec/jsonb/convert_field_to_type_test.cpp
new file mode 100644
index 00000000000..065d86c039c
--- /dev/null
+++ b/be/test/vec/jsonb/convert_field_to_type_test.cpp
@@ -0,0 +1,521 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/data_types/convert_field_to_type.cpp"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+
+#include "runtime/jsonb_value.h"
+#include "util/jsonb_document.h"
+#include "util/jsonb_writer.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_jsonb.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+
+class ConvertFieldToTypeTest : public ::testing::Test {
+protected:
+ void SetUp() override {}
+};
+
+// Test FieldVisitorToJsonb with different field types using the same pattern
as convert_field_to_typeImpl
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_Null) {
+ JsonbWriter writer;
+
+ // Test null field using Field::dispatch pattern
+ Field null_field;
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ null_field);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's a null value
+ ASSERT_TRUE(doc->getValue()->isNull());
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_Int64) {
+ JsonbWriter writer;
+
+ // Test Int64 field using Field::dispatch pattern
+ Int64 test_value = 12345;
+ Field int_field = test_value;
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ int_field);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's an integer value
+ ASSERT_TRUE(doc->getValue()->isInt64());
+ ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(), test_value);
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_UInt64) {
+ JsonbWriter writer;
+
+ // Test UInt64 field using Field::dispatch pattern
+ UInt64 test_value = 12345;
+ Field uint_field = test_value;
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ uint_field);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's an integer value
+ ASSERT_TRUE(doc->getValue()->isInt64());
+ ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(),
static_cast<Int64>(test_value));
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_Float64) {
+ JsonbWriter writer;
+
+ // Test Float64 field using Field::dispatch pattern
+ Float64 test_value = 123.456;
+ Field double_field = test_value;
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ double_field);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's a double value
+ ASSERT_TRUE(doc->getValue()->isDouble());
+ ASSERT_DOUBLE_EQ(((const JsonbDoubleVal*)doc->getValue())->val(),
test_value);
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_String) {
+ JsonbWriter writer;
+
+ // Test String field using Field::dispatch pattern
+ Field string_field = "hello world";
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ string_field);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's a string value
+ ASSERT_TRUE(doc->getValue()->isString());
+ const auto* string_val = static_cast<const JsonbBlobVal*>(doc->getValue());
+ std::string real_string(string_val->getBlob(), string_val->getBlobLen());
+ ASSERT_EQ(real_string, string_field.get<String>());
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_JsonbField) {
+ JsonbWriter writer;
+ JsonBinaryValue jsonb_value;
+ std::string test_data = R"({"a": ["1", "2"]})";
+ THROW_IF_ERROR(jsonb_value.from_json_string(test_data.data(),
test_data.size()));
+ Field jsonb_field_obj = JsonbField(jsonb_value.value(),
jsonb_value.size());
+
+ // Test JsonbField using Field::dispatch pattern
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ jsonb_field_obj);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's an object value
+ ASSERT_TRUE(doc->getValue()->isObject());
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_Array) {
+ JsonbWriter writer;
+
+ // Create an array with mixed types
+ Array array_field;
+ array_field.push_back(123);
+ array_field.push_back("hello");
+ array_field.push_back(456.789);
+
+ Field array_obj = array_field;
+
+ // Test Array using Field::dispatch pattern
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ array_obj);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's an array value
+ ASSERT_TRUE(doc->getValue()->isArray());
+ const ArrayVal& array = static_cast<const ArrayVal&>(*doc->getValue());
+ ASSERT_EQ(array.numElem(), 3);
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_NestedArray) {
+ JsonbWriter writer;
+
+ // Create a nested array
+ Array inner_array;
+ inner_array.push_back(1);
+ inner_array.push_back(2);
+
+ Array outer_array;
+ outer_array.push_back(inner_array);
+ outer_array.push_back("nested");
+
+ Field nested_array_obj = outer_array;
+
+ // Test nested Array using Field::dispatch pattern
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ nested_array_obj);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's an array value
+ ASSERT_TRUE(doc->getValue()->isArray());
+ const ArrayVal& array = static_cast<const ArrayVal&>(*doc->getValue());
+ ASSERT_EQ(array.numElem(), 2);
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_LargeInt) {
+ JsonbWriter writer;
+
+ // Test Int128 field using Field::dispatch pattern
+ Int128 test_value = 1234567890123456789;
+ Field largeint_field = test_value;
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ largeint_field);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's an int128 value
+ ASSERT_TRUE(doc->getValue()->isInt128());
+ ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(), test_value);
+}
+
+TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_UInt128) {
+ JsonbWriter writer;
+
+ // Test UInt128 field using Field::dispatch pattern
+ UInt128 test_value = 1234567890123456789;
+ Field uint128_field = test_value;
+ Field::dispatch([&writer](const auto& value) {
FieldVisitorToJsonb()(value, &writer); },
+ uint128_field);
+
+ auto* output = writer.getOutput();
+ ASSERT_NE(output, nullptr);
+ ASSERT_GT(output->getSize(), 0);
+
+ // Verify the output is valid JSONB
+ JsonbDocument* doc = nullptr;
+ auto status =
+ JsonbDocument::checkAndCreateDocument(output->getBuffer(),
output->getSize(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+
+ // Verify it's an int128 value
+ ASSERT_TRUE(doc->getValue()->isInt128());
+ ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(),
static_cast<Int128>(test_value));
+}
+
+// Test convert_field_to_type function with JSONB type (similar to
convert_field_to_typeImpl)
+TEST_F(ConvertFieldToTypeTest, ConvertFieldToType_ToJsonb) {
+ DataTypeJsonb jsonb_type;
+
+ // Test converting Int64 to JSONB
+ {
+ Int64 test_value = 12345;
+ Field int_field = test_value;
+ Field result;
+
+ convert_field_to_type(int_field, jsonb_type, &result);
+
+ ASSERT_EQ(result.get_type(), Field::Types::JSONB);
+ ASSERT_FALSE(result.is_null());
+
+ const JsonbField& jsonb_result = result.get<JsonbField>();
+ ASSERT_NE(jsonb_result.get_value(), nullptr);
+ ASSERT_GT(jsonb_result.get_size(), 0);
+
+ // Verify the JSONB content
+ JsonbDocument* doc = nullptr;
+ auto status =
JsonbDocument::checkAndCreateDocument(jsonb_result.get_value(),
+
jsonb_result.get_size(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+ ASSERT_TRUE(doc->getValue()->isInt64());
+ ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(), test_value);
+ }
+
+ // Test converting String to JSONB
+ {
+ Field string_field = "hello world";
+ Field result;
+
+ convert_field_to_type(string_field, jsonb_type, &result);
+
+ ASSERT_EQ(result.get_type(), Field::Types::JSONB);
+ ASSERT_FALSE(result.is_null());
+
+ const JsonbField& jsonb_result = result.get<JsonbField>();
+ ASSERT_NE(jsonb_result.get_value(), nullptr);
+ ASSERT_GT(jsonb_result.get_size(), 0);
+
+ // Verify the JSONB content
+ JsonbDocument* doc = nullptr;
+ auto status =
JsonbDocument::checkAndCreateDocument(jsonb_result.get_value(),
+
jsonb_result.get_size(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+ ASSERT_TRUE(doc->getValue()->isString());
+ const auto* string_val = static_cast<const
JsonbBlobVal*>(doc->getValue());
+ std::string real_string(string_val->getBlob(),
string_val->getBlobLen());
+ ASSERT_EQ(real_string, string_field.get<String>());
+ }
+
+ // Test converting Array to JSONB
+ {
+ Array array_field;
+ array_field.push_back(1);
+ array_field.push_back("test");
+ array_field.push_back(3.14);
+
+ Field array_obj = array_field;
+ Field result;
+
+ convert_field_to_type(array_obj, jsonb_type, &result);
+
+ ASSERT_EQ(result.get_type(), Field::Types::JSONB);
+ ASSERT_FALSE(result.is_null());
+
+ const JsonbField& jsonb_result = result.get<JsonbField>();
+ ASSERT_NE(jsonb_result.get_value(), nullptr);
+ ASSERT_GT(jsonb_result.get_size(), 0);
+
+ // Verify the JSONB content
+ JsonbDocument* doc = nullptr;
+ auto status =
JsonbDocument::checkAndCreateDocument(jsonb_result.get_value(),
+
jsonb_result.get_size(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+ ASSERT_TRUE(doc->getValue()->isArray());
+ const ArrayVal& array = static_cast<const ArrayVal&>(*doc->getValue());
+ ASSERT_EQ(array.numElem(), 3);
+ }
+
+ // Test converting JSONB to JSONB (should be no-op)
+ {
+ JsonbWriter test_writer;
+ test_writer.writeStartObject();
+ test_writer.writeKey("key");
+ test_writer.writeString("value");
+ test_writer.writeEndObject();
+
+ auto* test_output = test_writer.getOutput();
+ JsonbField original_jsonb(test_output->getBuffer(),
test_output->getSize());
+ Field jsonb_field = original_jsonb;
+ Field result;
+
+ convert_field_to_type(jsonb_field, jsonb_type, &result);
+
+ ASSERT_EQ(result.get_type(), Field::Types::JSONB);
+ ASSERT_FALSE(result.is_null());
+
+ const JsonbField& jsonb_result = result.get<JsonbField>();
+ ASSERT_NE(jsonb_result.get_value(), nullptr);
+ ASSERT_EQ(jsonb_result.get_size(), original_jsonb.get_size());
+ ASSERT_EQ(memcmp(jsonb_result.get_value(), original_jsonb.get_value(),
+ original_jsonb.get_size()),
+ 0);
+ }
+}
+
+// Test convert_field_to_type with nullable JSONB type
+TEST_F(ConvertFieldToTypeTest, ConvertFieldToType_ToNullableJsonb) {
+ auto nullable_jsonb_type =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeJsonb>());
+
+ // Test converting null field
+ {
+ Field null_field;
+ Field result;
+
+ convert_field_to_type(null_field, *nullable_jsonb_type, &result);
+
+ ASSERT_TRUE(result.is_null());
+ }
+
+ // Test converting non-null field
+ {
+ Field string_field = "test string";
+ Field result;
+
+ convert_field_to_type(string_field, *nullable_jsonb_type, &result);
+
+ ASSERT_EQ(result.get_type(), Field::Types::JSONB);
+ ASSERT_FALSE(result.is_null());
+
+ const JsonbField& jsonb_result = result.get<JsonbField>();
+ ASSERT_NE(jsonb_result.get_value(), nullptr);
+ ASSERT_GT(jsonb_result.get_size(), 0);
+
+ // Verify the JSONB content
+ JsonbDocument* doc = nullptr;
+ auto status =
JsonbDocument::checkAndCreateDocument(jsonb_result.get_value(),
+
jsonb_result.get_size(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " <<
status.to_string();
+ ASSERT_NE(doc, nullptr);
+ ASSERT_TRUE(doc->getValue()->isString());
+ const auto* string_val = static_cast<const
JsonbBlobVal*>(doc->getValue());
+ std::string real_string(string_val->getBlob(),
string_val->getBlobLen());
+ ASSERT_EQ(real_string, string_field.get<String>());
+ }
+}
+
+// Test convert_field_to_type with array of JSONB
+TEST_F(ConvertFieldToTypeTest, ConvertFieldToType_ArrayToJsonb) {
+ auto array_jsonb_type =
std::make_shared<DataTypeArray>(std::make_shared<DataTypeJsonb>());
+
+ // Create an array with mixed types that will be converted to JSONB
+ Array array_field;
+ array_field.push_back(123);
+ array_field.push_back("hello");
+ array_field.push_back(456.789);
+
+ Field array_obj = array_field;
+ Field result;
+
+ convert_field_to_type(array_obj, *array_jsonb_type, &result);
+
+ ASSERT_EQ(result.get_type(), Field::Types::Array);
+ ASSERT_FALSE(result.is_null());
+
+ const Array& result_array = result.get<Array>();
+ ASSERT_EQ(result_array.size(), 3);
+
+ // Verify each element is converted to JSONB
+ for (size_t i = 0; i < result_array.size(); ++i) {
+ ASSERT_EQ(result_array[i].get_type(), Field::Types::JSONB);
+ ASSERT_FALSE(result_array[i].is_null());
+
+ const auto& jsonb_element = result_array[i].get<JsonbField>();
+ ASSERT_NE(jsonb_element.get_value(), nullptr);
+ ASSERT_GT(jsonb_element.get_size(), 0);
+
+ // Verify the JSONB content
+ JsonbDocument* doc = nullptr;
+ auto status =
JsonbDocument::checkAndCreateDocument(jsonb_element.get_value(),
+
jsonb_element.get_size(), &doc);
+ ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument for
element " << i << ": "
+ << status.to_string();
+ ASSERT_NE(doc, nullptr);
+ }
+}
+
+// Test error cases
+TEST_F(ConvertFieldToTypeTest, ConvertFieldToType_ErrorCases) {
+ DataTypeJsonb jsonb_type;
+
+ // Test with unsupported types (should throw exception)
+ {
+ Field tuple_field = Tuple();
+
+ EXPECT_THROW(
+ {
+ Field result;
+ convert_field_to_type(tuple_field, jsonb_type, &result);
+ },
+ doris::Exception);
+ }
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/jsonb/json_parser_test.cpp
b/be/test/vec/jsonb/json_parser_test.cpp
new file mode 100644
index 00000000000..924bd13197d
--- /dev/null
+++ b/be/test/vec/jsonb/json_parser_test.cpp
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/json/json_parser.h"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+
+using doris::vectorized::JSONDataParser;
+using doris::vectorized::SimdJSONParser;
+using doris::vectorized::ParseConfig;
+
+TEST(JsonParserTest, ParseSimpleTypes) {
+ JSONDataParser<SimdJSONParser> parser;
+ ParseConfig config;
+
+ // int
+ auto result = parser.parse("123", 3, config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+
+ // double
+ result = parser.parse("1.23", 4, config);
+ ASSERT_TRUE(result.has_value());
+
+ // bool
+ result = parser.parse("true", 4, config);
+ ASSERT_TRUE(result.has_value());
+
+ // null
+ result = parser.parse("null", 4, config);
+ ASSERT_TRUE(result.has_value());
+
+ // string
+ result = parser.parse("\"abc\"", 5, config);
+ ASSERT_TRUE(result.has_value());
+}
+
+TEST(JsonParserTest, ParseObjectAndArray) {
+ JSONDataParser<SimdJSONParser> parser;
+ ParseConfig config;
+
+ // Object
+ auto result = parser.parse(R"({"a":1,"b":2})", 13, config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 2);
+
+ // Array
+ result = parser.parse("[1,2,3]", 7, config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+}
+
+TEST(JsonParserTest, ParseMultiLevelNestedArray) {
+ JSONDataParser<SimdJSONParser> parser;
+ ParseConfig config;
+
+ auto result = parser.parse("[[1,2],[3,4]]", 13, config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+ EXPECT_EQ(result->paths.size(), 1);
+ EXPECT_EQ(result->values[0].get_type(),
doris::vectorized::Field::Types::Array);
+
+ result = parser.parse("[[[1],[2]],[[3],[4]]]", 21, config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+ EXPECT_EQ(result->paths.size(), 1);
+ EXPECT_EQ(result->values[0].get_type(),
doris::vectorized::Field::Types::Array);
+
+ result = parser.parse("[[1,2],[3],[4,5,6]]", 19, config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+ EXPECT_EQ(result->paths.size(), 1);
+
+ // Test complex nested structure
+ config.enable_flatten_nested = false;
+ std::string json1 = R"({"a":[[1,2],[3],[4,5,6]]})";
+ // multi level nested array in object
+ result = parser.parse(json1.c_str(), json1.size(), config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+ EXPECT_EQ(result->paths.size(), 1);
+ EXPECT_EQ(result->values[0].get_type(),
doris::vectorized::Field::Types::Array);
+
+ std::string json = R"({"nested": [{"a": [1,2,3]}]})";
+ // result should be jsonbField
+ result = parser.parse(json.c_str(), json.size(), config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+ EXPECT_EQ(result->paths.size(), 1);
+ EXPECT_EQ(result->values[0].get_type(),
doris::vectorized::Field::Types::JSONB);
+
+ // multi level nested array in nested array object
+ std::string json2 = R"({"a":[{"b":[[1,2,3]]}]})";
+ result = parser.parse(json2.c_str(), json2.size(), config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+ EXPECT_EQ(result->paths.size(), 1);
+ EXPECT_EQ(result->values[0].get_type(),
doris::vectorized::Field::Types::JSONB);
+
+ // test flatten nested
+ config.enable_flatten_nested = true;
+ EXPECT_ANY_THROW(parser.parse(json.c_str(), json.size(), config));
+ // test flatten nested with multi level nested array
+ // no throw because it is not nested object array
+ result = parser.parse(json1.c_str(), json1.size(), config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(result->values.size(), 1);
+ EXPECT_EQ(result->paths.size(), 1);
+ EXPECT_EQ(result->values[0].get_type(),
doris::vectorized::Field::Types::Array);
+
+ EXPECT_ANY_THROW(parser.parse(json2.c_str(), json2.size(), config));
+}
+
+TEST(JsonParserTest, ParseNestedAndFlatten) {
+ JSONDataParser<SimdJSONParser> parser;
+ ParseConfig config;
+ config.enable_flatten_nested = true;
+
+ std::string json = R"({"a":[{"b":1},{"b":2}]})";
+ auto result = parser.parse(json.c_str(), json.size(), config);
+ ASSERT_TRUE(result.has_value());
+ EXPECT_GT(result->values.size(), 0);
+
+ config.enable_flatten_nested = false;
+ std::string json2 = R"({"a":[{"b":1},{"b":2}]})";
+ result = parser.parse(json2.c_str(), json2.size(), config);
+ ASSERT_TRUE(result.has_value());
+}
+
+TEST(JsonParserTest, ParseInvalidJson) {
+ JSONDataParser<SimdJSONParser> parser;
+ ParseConfig config;
+
+ auto result = parser.parse("{a:1}", 5, config);
+ ASSERT_FALSE(result.has_value());
+
+ result = parser.parse("", 0, config);
+ ASSERT_FALSE(result.has_value());
+}
+
+TEST(JsonParserTest, ParseCornerCases) {
+ JSONDataParser<SimdJSONParser> parser;
+ ParseConfig config;
+
+ auto result = parser.parse("{}", 2, config);
+ ASSERT_TRUE(result.has_value());
+
+ result = parser.parse("[]", 2, config);
+ ASSERT_TRUE(result.has_value());
+
+ result = parser.parse(R"({"a":"\n\t"})", 12, config);
+ ASSERT_TRUE(result.has_value());
+}
diff --git a/regression-test/data/variant_p0/desc.out
b/regression-test/data/variant_p0/desc.out
index 1eff52e4484..71f804cc25c 100644
Binary files a/regression-test/data/variant_p0/desc.out and
b/regression-test/data/variant_p0/desc.out differ
diff --git a/regression-test/data/variant_p0/nested2.out
b/regression-test/data/variant_p0/nested2.out
new file mode 100644
index 00000000000..c7790a107de
Binary files /dev/null and b/regression-test/data/variant_p0/nested2.out differ
diff --git a/regression-test/suites/variant_p0/load.groovy
b/regression-test/suites/variant_p0/load.groovy
index 8ec837ff18b..8d87175d1d3 100644
--- a/regression-test/suites/variant_p0/load.groovy
+++ b/regression-test/suites/variant_p0/load.groovy
@@ -14,6 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
+import org.awaitility.Awaitility
suite("regression_test_variant", "p0"){
diff --git a/regression-test/suites/variant_p0/nested2.groovy
b/regression-test/suites/variant_p0/nested2.groovy
new file mode 100644
index 00000000000..8d48fcfce9b
--- /dev/null
+++ b/regression-test/suites/variant_p0/nested2.groovy
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// this test is used to test the type conflict of nested array
+suite("variant_nested_type_conflict", "p0"){
+
+ try {
+
+ def table_name = "var_nested_type_conflict"
+ sql "DROP TABLE IF EXISTS ${table_name}"
+ sql """set describe_extend_variant_column = true"""
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${table_name} (
+ k bigint,
+ v variant
+ )
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 1 -- 1 bucket make really
compaction in conflict case
+ properties("replication_num" = "1", "disable_auto_compaction"
= "false", "variant_enable_flatten_nested" = "true");
+ """
+ def sql_select_batch = {
+ qt_sql_0 """select * from ${table_name} order by k"""
+
+ qt_sql_1 """select v['nested']['a'] from ${table_name} order by
k"""
+ qt_sql_2 """select v['nested']['b'] from ${table_name} order by
k"""
+ qt_sql_3 """select v['nested']['c'] from ${table_name} order by
k"""
+
+ qt_sql_4 """select v['nested'] from ${table_name} order by k"""
+ }
+
+ def sql_test_cast_to_array = {
+ // test cast to array<int>
+ qt_sql_8 """select cast(v['nested']['a'] as array<int>),
size(cast(v['nested']['a'] as array<int>)) from ${table_name} order by k"""
+ qt_sql_9 """select cast(v['nested']['b'] as array<int>),
size(cast(v['nested']['b'] as array<int>)) from ${table_name} order by k"""
+ qt_sql_10 """select cast(v['nested']['c'] as array<int>),
size(cast(v['nested']['c'] as array<int>)) from ${table_name} order by k"""
+
+ // test cast to array<string>
+ qt_sql_11 """select cast(v['nested']['a'] as array<string>),
size(cast(v['nested']['a'] as array<string>)) from ${table_name} order by k"""
+ qt_sql_12 """select cast(v['nested']['b'] as array<string>),
size(cast(v['nested']['b'] as array<string>)) from ${table_name} order by k"""
+ qt_sql_13 """select cast(v['nested']['c'] as array<string>),
size(cast(v['nested']['c'] as array<string>)) from ${table_name} order by k"""
+
+ // test cast to array<double>
+ qt_sql_14 """select cast(v['nested']['a'] as array<double>),
size(cast(v['nested']['a'] as array<double>)) from ${table_name} order by k"""
+ qt_sql_15 """select cast(v['nested']['b'] as array<double>),
size(cast(v['nested']['b'] as array<double>)) from ${table_name} order by k"""
+ qt_sql_16 """select cast(v['nested']['c'] as array<double>),
size(cast(v['nested']['c'] as array<double>)) from ${table_name} order by k"""
+
+ }
+ // insert Nested array in Nested array which is not supported
+ test {
+ sql """
+ insert into ${table_name} values (1, '{"nested": [{"a":
[1,2,3]}]}');
+ """
+ exception "Nesting of array in Nested array within variant
subcolumns is currently not supported."
+ }
+ /// insert a array of object for a, b, c
+ // insert type conflict in multiple rows
+ sql """
+ insert into ${table_name} values (1, '{"nested": [{"a": 1, "c":
1.1}, {"b": "1"}]}');
+ """
+
+ // for cloud we should select first and then desc for syncing rowset
to get latest schema
+ sql """
+ select * from ${table_name} order by k limit 1;
+ """
+ qt_sql_desc_1 """
+ desc ${table_name};
+ """
+ // now select for a, b, c
+ sql_select_batch()
+ sql_test_cast_to_array()
+ /// insert a, b type changed to double
+ sql """
+ insert into ${table_name} values (2, '{"nested": [{"a": 2.5, "b":
123.1}]}');
+ """
+ // for cloud we should select first and then desc for syncing rowset
to get latest schema
+ sql """
+ select * from ${table_name} order by k limit 1;
+ """
+ qt_sql_desc_2 """
+ desc ${table_name};
+ """
+ // now select for a, b, c
+ sql_select_batch()
+ sql_test_cast_to_array()
+
+ // trigger and wait compaction
+ trigger_and_wait_compaction("${table_name}", "full")
+
+ // now select for a, b, c
+ sql_select_batch()
+ sql_test_cast_to_array()
+
+ sql """ truncate table ${table_name} """
+
+
+ // insert type conflict in one row
+ sql """
+ insert into ${table_name} values (1, '{"nested": [{"a": 1, "b":
1.1}, {"a": "1", "b": "1", "c": "1"}]}');
+ """
+ // for cloud we should select first and then desc for syncing rowset
to get latest schema
+ sql """
+ select * from ${table_name} order by k limit 1;
+ """
+ qt_sql_desc_4 """
+ desc ${table_name};
+ """
+ // now select for a, b, c
+ sql_select_batch()
+ sql_test_cast_to_array()
+
+ // insert c type changed to double
+ sql """
+ insert into ${table_name} values (2, '{"nested": [{"a": 1, "c":
1.1}]}');
+ """
+ // for cloud we should select first and then desc for syncing rowset
to get latest schema
+ sql """
+ select * from ${table_name} order by k limit 1;
+ """
+ qt_sql_desc_5 """
+ desc ${table_name};
+ """
+ // now select for a, b, c
+ sql_select_batch()
+ sql_test_cast_to_array()
+
+ // trigger and wait compaction
+ trigger_and_wait_compaction("${table_name}", "full")
+
+ // now select for a, b, c
+ sql_select_batch()
+ sql_test_cast_to_array()
+
+ } finally {
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]