This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 2e01cc5c8a2 [fix](tableFuncs) fix explode_json_array_funcs (#39572) 2e01cc5c8a2 is described below commit 2e01cc5c8a20dd3a7200c109bddaccd720c85e4c Author: amory <wangqian...@selectdb.com> AuthorDate: Wed Aug 21 18:05:37 2024 +0800 [fix](tableFuncs) fix explode_json_array_funcs (#39572) 1. explode_json_array_json accept jsonb type and return jsonb type or string type and return string type 2. fix explode_json_array return empty set issue --- .../exprs/table_function/vexplode_json_array.cpp | 4 +- be/src/vec/functions/function_fake.cpp | 51 +++++++++++++++------- be/src/vec/functions/function_fake.h | 5 +++ .../functions/generator/ExplodeJsonArrayJson.java | 4 +- .../generator/ExplodeJsonArrayJsonOuter.java | 4 +- .../data/nereids_function_p0/gen_function/gen.out | 41 +++++++++++++++++ .../table_function/explode_json_array.out | 16 ++++++- .../nereids_function_p0/gen_function/gen.groovy | 5 ++- .../table_function/explode_json_array.groovy | 6 ++- 9 files changed, 114 insertions(+), 22 deletions(-) diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index f72c8ec25ae..3c22ef4e078 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -52,7 +52,7 @@ Status VExplodeJsonArrayTableFunction<DataImpl>::process_init(Block* block, Runt RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, &text_column_idx)); _text_column = block->get_by_position(text_column_idx).column; - _text_datatype = block->get_by_position(text_column_idx).type; + _text_datatype = remove_nullable(block->get_by_position(text_column_idx).type); return Status::OK(); } @@ -155,4 +155,4 @@ template class VExplodeJsonArrayTableFunction<ParsedDataDouble>; template class VExplodeJsonArrayTableFunction<ParsedDataString>; template class VExplodeJsonArrayTableFunction<ParsedDataJSON>; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_fake.cpp b/be/src/vec/functions/function_fake.cpp index 62d5fe4e893..c7edcf4df8f 100644 --- a/be/src/vec/functions/function_fake.cpp +++ b/be/src/vec/functions/function_fake.cpp @@ -38,7 +38,7 @@ namespace doris::vectorized { -template <typename ReturnType, bool AlwaysNullable = false> +template <typename ReturnType, bool AlwaysNullable = false, bool VARIADIC = false> struct FunctionFakeBaseImpl { static DataTypePtr get_return_type_impl(const DataTypes& arguments) { if constexpr (AlwaysNullable) { @@ -46,6 +46,16 @@ struct FunctionFakeBaseImpl { } return std::make_shared<ReturnType>(); } + static DataTypes get_variadic_argument_types() { + if constexpr (VARIADIC) { + if constexpr (AlwaysNullable) { + return {make_nullable(std::make_shared<ReturnType>())}; + } + return {std::make_shared<ReturnType>()}; + } else { + return {}; + } + } static std::string get_error_msg() { return "Fake function do not support execute"; } }; @@ -55,6 +65,7 @@ struct FunctionExplode { return make_nullable( check_and_get_data_type<DataTypeArray>(arguments[0].get())->get_nested_type()); } + static DataTypes get_variadic_argument_types() { return {}; } static std::string get_error_msg() { return "Fake function do not support execute"; } }; @@ -67,6 +78,7 @@ struct FunctionExplodeMap { fieldTypes[1] = check_and_get_data_type<DataTypeMap>(arguments[0].get())->get_value_type(); return make_nullable(std::make_shared<vectorized::DataTypeStruct>(fieldTypes)); } + static DataTypes get_variadic_argument_types() { return {}; } static std::string get_error_msg() { return "Fake function do not support execute"; } }; @@ -80,6 +92,7 @@ struct FunctionExplodeJsonObject { fieldTypes[1] = make_nullable(std::make_shared<DataTypeJsonb>()); return make_nullable(std::make_shared<vectorized::DataTypeStruct>(fieldTypes)); } + static DataTypes get_variadic_argument_types() { return {}; } static std::string get_error_msg() { return "Fake function do not support execute"; } }; @@ -87,6 +100,7 @@ struct FunctionEsquery { static DataTypePtr get_return_type_impl(const DataTypes& arguments) { return FunctionFakeBaseImpl<DataTypeUInt8>::get_return_type_impl(arguments); } + static DataTypes get_variadic_argument_types() { return {}; } static std::string get_error_msg() { return "esquery only supported on es table"; } }; @@ -102,11 +116,13 @@ void register_table_function_expand(SimpleFunctionFactory& factory, const std::s factory.register_function<FunctionFake<FunctionImpl>>(name + suffix); }; -template <typename ReturnType> +template <typename ReturnType, bool VARIADIC> void register_table_function_expand_default(SimpleFunctionFactory& factory, const std::string& name, const std::string& suffix) { - factory.register_function<FunctionFake<FunctionFakeBaseImpl<ReturnType>>>(name); - factory.register_function<FunctionFake<FunctionFakeBaseImpl<ReturnType, true>>>(name + suffix); + factory.register_function<FunctionFake<FunctionFakeBaseImpl<ReturnType, false, VARIADIC>>>( + name); + factory.register_function<FunctionFake<FunctionFakeBaseImpl<ReturnType, true, VARIADIC>>>( + name + suffix); }; template <typename FunctionImpl> @@ -114,10 +130,11 @@ void register_table_function_expand_outer(SimpleFunctionFactory& factory, const register_table_function_expand<FunctionImpl>(factory, name, COMBINATOR_SUFFIX_OUTER); }; -template <typename ReturnType> +template <typename ReturnType, bool VARIADIC> void register_table_function_expand_outer_default(SimpleFunctionFactory& factory, const std::string& name) { - register_table_function_expand_default<ReturnType>(factory, name, COMBINATOR_SUFFIX_OUTER); + register_table_function_expand_default<ReturnType, VARIADIC>(factory, name, + COMBINATOR_SUFFIX_OUTER); }; void register_function_fake(SimpleFunctionFactory& factory) { @@ -127,15 +144,19 @@ void register_function_fake(SimpleFunctionFactory& factory) { register_table_function_expand_outer<FunctionExplodeMap>(factory, "explode_map"); register_table_function_expand_outer<FunctionExplodeJsonObject>(factory, "explode_json_object"); - register_table_function_expand_outer_default<DataTypeString>(factory, "explode_split"); - register_table_function_expand_outer_default<DataTypeInt32>(factory, "explode_numbers"); - register_table_function_expand_outer_default<DataTypeInt64>(factory, "explode_json_array_int"); - register_table_function_expand_outer_default<DataTypeString>(factory, - "explode_json_array_string"); - register_table_function_expand_outer_default<DataTypeJsonb>(factory, "explode_json_array_json"); - register_table_function_expand_outer_default<DataTypeFloat64>(factory, - "explode_json_array_double"); - register_table_function_expand_outer_default<DataTypeInt64>(factory, "explode_bitmap"); + register_table_function_expand_outer_default<DataTypeString, false>(factory, "explode_split"); + register_table_function_expand_outer_default<DataTypeInt32, false>(factory, "explode_numbers"); + register_table_function_expand_outer_default<DataTypeInt64, false>(factory, + "explode_json_array_int"); + register_table_function_expand_outer_default<DataTypeString, false>( + factory, "explode_json_array_string"); + register_table_function_expand_outer_default<DataTypeJsonb, true>(factory, + "explode_json_array_json"); + register_table_function_expand_outer_default<DataTypeString, true>(factory, + "explode_json_array_json"); + register_table_function_expand_outer_default<DataTypeFloat64, false>( + factory, "explode_json_array_double"); + register_table_function_expand_outer_default<DataTypeInt64, false>(factory, "explode_bitmap"); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_fake.h b/be/src/vec/functions/function_fake.h index fa2b69092ae..d180cbf3270 100644 --- a/be/src/vec/functions/function_fake.h +++ b/be/src/vec/functions/function_fake.h @@ -44,6 +44,7 @@ struct UDTFImpl { static std::string get_error_msg() { return "UDTF function do not support this, it's should execute with lateral view."; } + static DataTypes get_variadic_argument_types() { return {}; } }; // FunctionFake is use for some function call expr only work at prepare/open phase, do not support execute(). @@ -64,6 +65,10 @@ public: return Impl::get_return_type_impl(arguments); } + DataTypes get_variadic_argument_types_impl() const override { + return Impl::get_variadic_argument_types(); + } + bool use_default_implementation_for_nulls() const override { if constexpr (std::is_same_v<Impl, UDTFImpl>) { return false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java index 2f8d27d2e4a..6209f23a7dd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -35,7 +36,8 @@ import java.util.List; */ public class ExplodeJsonArrayJson extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( - FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE) + FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java index acfc3209963..ab358855196 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -35,7 +36,8 @@ import java.util.List; */ public class ExplodeJsonArrayJsonOuter extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( - FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE) + FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/regression-test/data/nereids_function_p0/gen_function/gen.out b/regression-test/data/nereids_function_p0/gen_function/gen.out index 17f86d875e0..286a05ee85b 100644 --- a/regression-test/data/nereids_function_p0/gen_function/gen.out +++ b/regression-test/data/nereids_function_p0/gen_function/gen.out @@ -757,6 +757,47 @@ 11 {"id":2,"name":"Mary"} 11 {"id":3,"name":"Bob"} +-- !sql_explode_json_array_json_Json -- +\N {"id":1,"name":"John"} +\N {"id":2,"name":"Mary"} +\N {"id":3,"name":"Bob"} +0 {"id":1,"name":"John"} +0 {"id":2,"name":"Mary"} +0 {"id":3,"name":"Bob"} +1 {"id":1,"name":"John"} +1 {"id":2,"name":"Mary"} +1 {"id":3,"name":"Bob"} +2 {"id":1,"name":"John"} +2 {"id":2,"name":"Mary"} +2 {"id":3,"name":"Bob"} +3 {"id":1,"name":"John"} +3 {"id":2,"name":"Mary"} +3 {"id":3,"name":"Bob"} +4 {"id":1,"name":"John"} +4 {"id":2,"name":"Mary"} +4 {"id":3,"name":"Bob"} +5 {"id":1,"name":"John"} +5 {"id":2,"name":"Mary"} +5 {"id":3,"name":"Bob"} +6 {"id":1,"name":"John"} +6 {"id":2,"name":"Mary"} +6 {"id":3,"name":"Bob"} +7 {"id":1,"name":"John"} +7 {"id":2,"name":"Mary"} +7 {"id":3,"name":"Bob"} +8 {"id":1,"name":"John"} +8 {"id":2,"name":"Mary"} +8 {"id":3,"name":"Bob"} +9 {"id":1,"name":"John"} +9 {"id":2,"name":"Mary"} +9 {"id":3,"name":"Bob"} +10 {"id":1,"name":"John"} +10 {"id":2,"name":"Mary"} +10 {"id":3,"name":"Bob"} +11 {"id":1,"name":"John"} +11 {"id":2,"name":"Mary"} +11 {"id":3,"name":"Bob"} + -- !sql_explode_Double -- 0 0.1 1 0.2 diff --git a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out index ccc012e1121..f75b56b3305 100644 --- a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out +++ b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out @@ -79,7 +79,21 @@ \N 80 3 \N 80 b --- !outer_join_explode_json_array11 -- +-- !outer_join_explode_json_array111 -- +\N \N {"id":1,"name":"John"} +\N \N {"id":2,"name":"Mary"} +\N \N {"id":3,"name":"Bob"} +\N 30 {"id":1,"name":"John"} +\N 30 {"id":2,"name":"Mary"} +\N 30 {"id":3,"name":"Bob"} +\N 50 {"id":1,"name":"John"} +\N 50 {"id":2,"name":"Mary"} +\N 50 {"id":3,"name":"Bob"} +\N 80 {"id":1,"name":"John"} +\N 80 {"id":2,"name":"Mary"} +\N 80 {"id":3,"name":"Bob"} + +-- !outer_join_explode_json_array112 -- \N \N {"id":1,"name":"John"} \N \N {"id":2,"name":"Mary"} \N \N {"id":3,"name":"Bob"} diff --git a/regression-test/suites/nereids_function_p0/gen_function/gen.groovy b/regression-test/suites/nereids_function_p0/gen_function/gen.groovy index 7fa0ea5c681..7f30c9a2b6a 100644 --- a/regression-test/suites/nereids_function_p0/gen_function/gen.groovy +++ b/regression-test/suites/nereids_function_p0/gen_function/gen.groovy @@ -60,7 +60,10 @@ suite("nereids_gen_fn") { select id, e from fn_test lateral view explode_json_array_string('["1", "2", "3"]') lv as e order by id, e''' qt_sql_explode_json_array_json_Varchar ''' - select id, e from fn_test lateral view explode_json_array_json('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') lv as e order by id, cast(e as string)''' + select id, e from fn_test lateral view explode_json_array_json('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') lv as e order by id, e''' + + qt_sql_explode_json_array_json_Json ''' + select id, e from fn_test lateral view explode_json_array_json(cast('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]' as json)) lv as e order by id, cast(e as string); ''' // explode order_qt_sql_explode_Double "select id, e from fn_test lateral view explode(kadbl) lv as e order by id, e" diff --git a/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy b/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy index e4b13c96dd5..edc1bc7fa1a 100644 --- a/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy +++ b/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy @@ -60,8 +60,12 @@ suite("explode_json_array") { qt_outer_join_explode_json_array11 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM person a LEFT JOIN person b ON a.id=b.age)T LATERAL VIEW EXPLODE_JSON_ARRAY_STRING('[1, "b", 3]') TMP AS e1) AS T ORDER BY age, e1""" - qt_outer_join_explode_json_array11 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM + qt_outer_join_explode_json_array111 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM person a LEFT JOIN person b ON a.id=b.age)T LATERAL VIEW EXPLODE_JSON_ARRAY_JSON('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') + TMP AS e1) AS T ORDER BY age, e1""" + + qt_outer_join_explode_json_array112 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM + person a LEFT JOIN person b ON a.id=b.age)T LATERAL VIEW EXPLODE_JSON_ARRAY_JSON(cast('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]' as Json)) TMP AS e1) AS T ORDER BY age, cast(e1 as string)""" qt_explode_json_array12 """ SELECT c_age, COUNT(1) FROM person --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org